Soneson et al., Genom Biol 2016 17:12 : https://doi.org/10.1186/s13059-015-0862-3 ArrayExpress repository of the simulated datasets: https://www.ebi.ac.uk/arrayexpress/experiments/E-MTAB-3766/
# libraries
library(rats)
library(DRIMSeq)
library(data.table)
library(ggplot2)
library(matrixStats)
library(parallel)
library(gganimate)
library(dplyr)
# ggplot2 themes
gth <- theme(axis.line.x= element_line(),
axis.line.y= element_line(),
strip.background= element_rect(fill= "grey95"),
strip.text.y= element_text(size= rel(1.2)),
strip.text.x= element_text(size= rel(1.1)),
panel.grid.major.x= element_blank(),
panel.grid.minor.x= element_blank(),
panel.grid.major.y= element_line(colour='grey90'),
panel.grid.minor.y= element_blank(),
panel.background= element_rect(fill = "white"),
legend.key = element_rect(fill = 'white'),
panel.spacing = unit(1, "lines") )
gth2 <- theme(axis.line.x= element_line(),
axis.text.x=element_text(angle=90),
axis.line.y= element_line(),
strip.background= element_rect(fill= "grey95"),
strip.text.y= element_text(size= rel(1.2)),
strip.text.x= element_text(size= rel(1.1)),
panel.grid.major.x= element_line(colour='grey90'),
panel.grid.minor.x= element_line(colour='grey95'),
panel.grid.major.y= element_line(colour='grey90'),
panel.grid.minor.y= element_line(colour='grey95'),
panel.background= element_rect(fill = "white"),
legend.key = element_rect(fill = 'white'),
panel.spacing = unit(1, "lines") )
gth3 <- theme(axis.line.x= element_line(),
axis.text.x=element_text(angle=90, hjust=1),
axis.line.y= element_line(),
strip.background= element_rect(fill= "grey95"),
strip.text.y= element_text(size= rel(1.2)),
strip.text.x= element_text(size= rel(1.1)),
panel.grid.major.x= element_line(colour='grey95'),
panel.grid.minor.x= element_blank(),
panel.grid.major.y= element_line(colour='grey95'),
panel.grid.minor.y= element_line(colour='grey95'),
panel.background= element_rect(fill = "white"),
legend.key = element_rect(fill = 'white'),
panel.spacing = unit(1, "lines"),
strip.placement = "outside")
basedir <- '~'
# Import SUPPA2 results
sdk <- fread(file.path(basedir, 'DE', 'suppa_soneson_Dm70-kallisto.tsv.dpsi.temp.0'))
sds <- fread(file.path(basedir, 'DE', 'suppa_soneson_Dm70-salmon.tsv.dpsi.temp.0'))
shk <- fread(file.path(basedir, 'DE', 'suppa_soneson_Hs71-kallisto.tsv.dpsi.temp.0'))
shs <- fread(file.path(basedir, 'DE', 'suppa_soneson_Hs71-salmon.tsv.dpsi.temp.0'))
# Import DRIMSeq results
# Gene-level (default)
ddk <- fread(file.path(basedir, 'DE', 'drimseq0_soneson_Dm70-kallisto.results.tsv'))
dds <- fread(file.path(basedir, 'DE', 'drimseq0_soneson_Dm70-salmon.results.tsv'))
dhk <- fread(file.path(basedir, 'DE', 'drimseq0_soneson_Hs71-kallisto.results.tsv'))
dhs <- fread(file.path(basedir, 'DE', 'drimseq0_soneson_Hs71-salmon.results.tsv'))
# Transcript-level
ddkt <- fread(file.path(basedir, 'DE', 'drimseq0_soneson_Dm70-kallisto.results-t.tsv'))
ddst <- fread(file.path(basedir, 'DE', 'drimseq0_soneson_Dm70-salmon.results-t.tsv'))
dhkt <- fread(file.path(basedir, 'DE', 'drimseq0_soneson_Hs71-kallisto.results-t.tsv'))
dhst <- fread(file.path(basedir, 'DE', 'drimseq0_soneson_Hs71-salmon.results-t.tsv'))
# Pre-comparison object (contains the abundances)
ddkr <- readRDS(file.path(basedir, 'DE', 'drimseq0_soneson_Dm70-kallisto.RDS'))
ddsr <- readRDS(file.path(basedir, 'DE', 'drimseq0_soneson_Dm70-salmon.RDS'))
dhkr <- readRDS(file.path(basedir, 'DE', 'drimseq0_soneson_Hs71-kallisto.RDS'))
dhsr <- readRDS(file.path(basedir, 'DE', 'drimseq0_soneson_Hs71-salmon.RDS'))
# Import DEXSeq results.
xds <- as.data.table(readRDS(file.path(basedir, 'DE', 'dexseq0_soneson_Dm70-salmon.RDS')))
## Loading required package: DEXSeq
## Loading required package: BiocParallel
## Loading required package: Biobase
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind,
## colMeans, colnames, colSums, dirname, do.call, duplicated,
## eval, evalq, Filter, Find, get, grep, grepl, intersect,
## is.unsorted, lapply, lengths, Map, mapply, match, mget, order,
## paste, pmax, pmax.int, pmin, pmin.int, Position, rank, rbind,
## Reduce, rowMeans, rownames, rowSums, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which, which.max,
## which.min
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
## The following object is masked from 'package:DRIMSeq':
##
## samples
## Loading required package: SummarizedExperiment
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following objects are masked from 'package:data.table':
##
## first, second
## The following object is masked from 'package:base':
##
## expand.grid
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
## The following object is masked from 'package:data.table':
##
## shift
## Loading required package: GenomeInfoDb
## Loading required package: DelayedArray
##
## Attaching package: 'DelayedArray'
## The following objects are masked from 'package:matrixStats':
##
## colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges
## The following objects are masked from 'package:base':
##
## aperm, apply
## Loading required package: DESeq2
##
## Attaching package: 'DESeq2'
## The following object is masked from 'package:DRIMSeq':
##
## results
## Loading required package: AnnotationDbi
##
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: RColorBrewer
xhs <- as.data.table(readRDS(file.path(basedir, 'DE', 'dexseq0_soneson_Hs71-salmon.RDS')))
xsds <- as.data.table(readRDS(file.path(basedir, 'DE', 'dexseq0-stageR_soneson_Dm70-salmon.RDS')))
xshs <- as.data.table(readRDS(file.path(basedir, 'DE', 'dexseq0-stageR_soneson_Hs71-salmon.RDS')))
# Size-normalised abundances
xdsc <- as.data.table(counts(readRDS(file.path(basedir, 'DE', 'dexseq0_soneson_Dm70-salmon.RDS')), normalized=TRUE))
xhsc <- as.data.table(counts(readRDS(file.path(basedir, 'DE', 'dexseq0_soneson_Hs71-salmon.RDS')), normalized=TRUE))
# Import ground truth
dtruth <- fread(file.path(basedir, 'truth_drosophila_non_null_missing20_ms.txt'))
htruth <- fread(file.path(basedir, 'truth_human_non_null_missing20_ms.txt'))
# Import RATs results with 0.6.4 default thresholds
rdk <- readRDS(file.path(basedir, 'DE', 'rats_soneson_Dm70-kallisto_def.RDS'))
rds <- readRDS(file.path(basedir, 'DE', 'rats_soneson_Dm70-salmon_def.RDS'))
rhk <- readRDS(file.path(basedir, 'DE', 'rats_soneson_Hs71-kallisto_def.RDS'))
rhs <- readRDS(file.path(basedir, 'DE', 'rats_soneson_Hs71-salmon_def.RDS'))
# Import RATs results with different thresholds.
rdkth <- readRDS(file.path(basedir, 'DE', 'rats_soneson_Dm70-kallisto_th4.RDS'))
rdsth <- readRDS(file.path(basedir, 'DE', 'rats_soneson_Dm70-salmon_th4.RDS'))
rhkth <- readRDS(file.path(basedir, 'DE', 'rats_soneson_Hs71-kallisto_th4.RDS'))
rhsth <- readRDS(file.path(basedir, 'DE', 'rats_soneson_Hs71-salmon_th4.RDS'))
rdkth2 <- readRDS(file.path(basedir, 'DE', 'rats_soneson_Dm70-kallisto_th5.RDS'))
rdsth2 <- readRDS(file.path(basedir, 'DE', 'rats_soneson_Dm70-salmon_th5.RDS'))
rhkth2 <- readRDS(file.path(basedir, 'DE', 'rats_soneson_Hs71-kallisto_th5.RDS'))
rhsth2 <- readRDS(file.path(basedir, 'DE', 'rats_soneson_Hs71-salmon_th5.RDS'))
# Import SUPPA2 results
sdk10 <- fread(file.path(basedir, 'DE', 'suppa10_soneson_Dm70-kallisto.tsv.dpsi.temp.0'))
sds10 <- fread(file.path(basedir, 'DE', 'suppa10_soneson_Dm70-salmon.tsv.dpsi.temp.0'))
shk10 <- fread(file.path(basedir, 'DE', 'suppa10_soneson_Hs71-kallisto.tsv.dpsi.temp.0'))
shs10 <- fread(file.path(basedir, 'DE', 'suppa10_soneson_Hs71-salmon.tsv.dpsi.temp.0'))
# Import DRIMSeq results
# Gene-level (default)
ddk10 <- fread(file.path(basedir, 'DE', 'drimseq10_soneson_Dm70-kallisto.results.tsv'))
dds10 <- fread(file.path(basedir, 'DE', 'drimseq10_soneson_Dm70-salmon.results.tsv'))
dhk10 <- fread(file.path(basedir, 'DE', 'drimseq10_soneson_Hs71-kallisto.results.tsv'))
dhs10 <- fread(file.path(basedir, 'DE', 'drimseq10_soneson_Hs71-salmon.results.tsv'))
# Transcript-level
ddkt10 <- fread(file.path(basedir, 'DE', 'drimseq10_soneson_Dm70-kallisto.results-t.tsv'))
ddst10 <- fread(file.path(basedir, 'DE', 'drimseq10_soneson_Dm70-salmon.results-t.tsv'))
dhkt10 <- fread(file.path(basedir, 'DE', 'drimseq10_soneson_Hs71-kallisto.results-t.tsv'))
dhst10 <- fread(file.path(basedir, 'DE', 'drimseq10_soneson_Hs71-salmon.results-t.tsv'))
# Pre-comparison object (contains the abundances)
ddkr10 <- readRDS(file.path(basedir, 'DE', 'drimseq10_soneson_Dm70-kallisto.RDS'))
ddsr10 <- readRDS(file.path(basedir, 'DE', 'drimseq10_soneson_Dm70-salmon.RDS'))
dhkr10 <- readRDS(file.path(basedir, 'DE', 'drimseq10_soneson_Hs71-kallisto.RDS'))
dhsr10 <- readRDS(file.path(basedir, 'DE', 'drimseq10_soneson_Hs71-salmon.RDS'))
# # Import DEXSeq results.
xds10 <- as.data.table(readRDS(file.path(basedir, 'DE', 'dexseq10_soneson_Dm70-salmon.RDS')))
xhs10 <- as.data.table(readRDS(file.path(basedir, 'DE', 'dexseq10_soneson_Hs71-salmon.RDS')))
xsds10 <- as.data.table(readRDS(file.path(basedir, 'DE', 'dexseq10-stageR_soneson_Dm70-salmon.RDS')))
xshs10 <- as.data.table(readRDS(file.path(basedir, 'DE', 'dexseq10-stageR_soneson_Hs71-salmon.RDS')))
# Size-normalised abundances
xdsc10 <- as.data.table(counts(readRDS(file.path(basedir, 'DE', 'dexseq10_soneson_Dm70-salmon.RDS')), normalized=TRUE))
xhsc10 <- as.data.table(counts(readRDS(file.path(basedir, 'DE', 'dexseq10_soneson_Hs71-salmon.RDS')), normalized=TRUE))
# Import RATs results with 0.6.4 default thresholds
rdk10 <- readRDS(file.path(basedir, 'DE', 'rats10_soneson_Dm70-kallisto_def.RDS'))
rds10 <- readRDS(file.path(basedir, 'DE', 'rats10_soneson_Dm70-salmon_def.RDS'))
rhk10 <- readRDS(file.path(basedir, 'DE', 'rats10_soneson_Hs71-kallisto_def.RDS'))
rhs10 <- readRDS(file.path(basedir, 'DE', 'rats10_soneson_Hs71-salmon_def.RDS'))
# Import RATs results with different thresholds.
rdkth10 <- readRDS(file.path(basedir, 'DE', 'rats10_soneson_Dm70-kallisto_th4.RDS'))
rdsth10 <- readRDS(file.path(basedir, 'DE', 'rats10_soneson_Dm70-salmon_th4.RDS'))
rhkth10 <- readRDS(file.path(basedir, 'DE', 'rats10_soneson_Hs71-kallisto_th4.RDS'))
rhsth10 <- readRDS(file.path(basedir, 'DE', 'rats10_soneson_Hs71-salmon_th4.RDS'))
rdkth210 <- readRDS(file.path(basedir, 'DE', 'rats10_soneson_Dm70-kallisto_th5.RDS'))
rdsth210 <- readRDS(file.path(basedir, 'DE', 'rats10_soneson_Dm70-salmon_th5.RDS'))
rhkth210 <- readRDS(file.path(basedir, 'DE', 'rats10_soneson_Hs71-kallisto_th5.RDS'))
rhsth210 <- readRDS(file.path(basedir, 'DE', 'rats10_soneson_Hs71-salmon_th5.RDS'))
# Import SUPPA2 results
sdk50 <- fread(file.path(basedir, 'DE', 'suppa50_soneson_Dm70-kallisto.tsv.dpsi.temp.0'))
sds50 <- fread(file.path(basedir, 'DE', 'suppa50_soneson_Dm70-salmon.tsv.dpsi.temp.0'))
shk50 <- fread(file.path(basedir, 'DE', 'suppa50_soneson_Hs71-kallisto.tsv.dpsi.temp.0'))
shs50 <- fread(file.path(basedir, 'DE', 'suppa50_soneson_Hs71-salmon.tsv.dpsi.temp.0'))
# Import DRIMSeq results
# Gene-level (default)
ddk50 <- fread(file.path(basedir, 'DE', 'drimseq50_soneson_Dm70-kallisto.results.tsv'))
dds50 <- fread(file.path(basedir, 'DE', 'drimseq50_soneson_Dm70-salmon.results.tsv'))
dhk50 <- fread(file.path(basedir, 'DE', 'drimseq50_soneson_Hs71-kallisto.results.tsv'))
dhs50 <- fread(file.path(basedir, 'DE', 'drimseq50_soneson_Hs71-salmon.results.tsv'))
# Transcript-level
ddkt50 <- fread(file.path(basedir, 'DE', 'drimseq50_soneson_Dm70-kallisto.results-t.tsv'))
ddst50 <- fread(file.path(basedir, 'DE', 'drimseq50_soneson_Dm70-salmon.results-t.tsv'))
dhkt50 <- fread(file.path(basedir, 'DE', 'drimseq50_soneson_Hs71-kallisto.results-t.tsv'))
dhst50 <- fread(file.path(basedir, 'DE', 'drimseq50_soneson_Hs71-salmon.results-t.tsv'))
# Pre-comparison object (contains the abundances)
ddkr50 <- readRDS(file.path(basedir, 'DE', 'drimseq50_soneson_Dm70-kallisto.RDS'))
ddsr50 <- readRDS(file.path(basedir, 'DE', 'drimseq50_soneson_Dm70-salmon.RDS'))
dhkr50 <- readRDS(file.path(basedir, 'DE', 'drimseq50_soneson_Hs71-kallisto.RDS'))
dhsr50 <- readRDS(file.path(basedir, 'DE', 'drimseq50_soneson_Hs71-salmon.RDS'))
# # Import DEXSeq results.
xds50 <- as.data.table(readRDS(file.path(basedir, 'DE', 'dexseq50_soneson_Dm70-salmon.RDS')))
xhs50 <- as.data.table(readRDS(file.path(basedir, 'DE', 'dexseq50_soneson_Hs71-salmon.RDS')))
xsds50 <- as.data.table(readRDS(file.path(basedir, 'DE', 'dexseq50-stageR_soneson_Dm70-salmon.RDS')))
xshs50 <- as.data.table(readRDS(file.path(basedir, 'DE', 'dexseq50-stageR_soneson_Hs71-salmon.RDS')))
# Size-normalised abundances
xdsc50 <- as.data.table(counts(readRDS(file.path(basedir, 'DE', 'dexseq50_soneson_Dm70-salmon.RDS')), normalized=TRUE))
xhsc50 <- as.data.table(counts(readRDS(file.path(basedir, 'DE', 'dexseq50_soneson_Hs71-salmon.RDS')), normalized=TRUE))
# Import RATs results with 0.6.4 default thresholds
rdk50 <- readRDS(file.path(basedir, 'DE', 'rats50_soneson_Dm70-kallisto_def.RDS'))
rds50 <- readRDS(file.path(basedir, 'DE', 'rats50_soneson_Dm70-salmon_def.RDS'))
rhk50 <- readRDS(file.path(basedir, 'DE', 'rats50_soneson_Hs71-kallisto_def.RDS'))
rhs50 <- readRDS(file.path(basedir, 'DE', 'rats50_soneson_Hs71-salmon_def.RDS'))
# Import RATs results with different thresholds.
rdkth50 <- readRDS(file.path(basedir, 'DE', 'rats50_soneson_Dm70-kallisto_th4.RDS'))
rdsth50 <- readRDS(file.path(basedir, 'DE', 'rats50_soneson_Dm70-salmon_th4.RDS'))
rhkth50 <- readRDS(file.path(basedir, 'DE', 'rats50_soneson_Hs71-kallisto_th4.RDS'))
rhsth50 <- readRDS(file.path(basedir, 'DE', 'rats50_soneson_Hs71-salmon_th4.RDS'))
rdkth250 <- readRDS(file.path(basedir, 'DE', 'rats50_soneson_Dm70-kallisto_th5.RDS'))
rdsth250 <- readRDS(file.path(basedir, 'DE', 'rats50_soneson_Dm70-salmon_th5.RDS'))
rhkth250 <- readRDS(file.path(basedir, 'DE', 'rats50_soneson_Hs71-kallisto_th5.RDS'))
rhsth250 <- readRDS(file.path(basedir, 'DE', 'rats50_soneson_Hs71-salmon_th5.RDS'))
In the plots that follow, the different pre-filter values are shown as different frames of the animation.
# Index DRIMSeq by gene
setkey(ddk, gene_id)
setkey(dds, gene_id)
setkey(dhk, gene_id)
setkey(dhs, gene_id)
setkey(ddkt, gene_id)
setkey(ddst, gene_id)
setkey(dhkt, gene_id)
setkey(dhst, gene_id)
# Calculate difference of proporions for DRIMSeq.
txProp <- function (d, asel=c("s1", "s2", "s3"), bsel=c("s4", "s5", "s6")) {
cnts <- as.data.table(counts(d))
setkey(cnts, gene_id, feature_id)
ctsA <- data.table(gene_id=cnts$gene_id, cnt=rowSums(subset(cnts, select=asel)))
ctsB <- data.table(gene_id=cnts$gene_id, cnt=rowSums(subset(cnts, select=bsel)))
ctsA[, total := sum(cnt), by=gene_id]
ctsB[, total := sum(cnt), by=gene_id]
ctsA[, prop := cnt / total]
ctsB[, prop := cnt / total ]
return(ctsB$prop - ctsA$prop)
}
ddkt$Dprop <- txProp(ddkr)
ddst$Dprop <- txProp(ddsr)
dhkt$Dprop <- txProp(dhkr)
dhst$Dprop <- txProp(dhsr)
rm(ddkr)
rm(ddsr)
rm(dhkr)
rm(dhsr)
ddk$maxDprop <- ddkt[, max(abs(Dprop)), by=gene_id]$V1 ; names(ddk)[length(ddk)] <- "maxDprop"
dds$maxDprop <- ddst[, max(abs(Dprop)), by=gene_id]$V1 ; names(dds)[length(dds)] <- "maxDprop"
dhk$maxDprop <- dhkt[, max(abs(Dprop)), by=gene_id]$V1 ; names(dhk)[length(dhk)] <- "maxDprop"
dhs$maxDprop <- dhst[, max(abs(Dprop)), by=gene_id]$V1 ; names(dhs)[length(dhs)] <- "maxDprop"
# Give shorter names to SUPPA2 columns, for ease of typing
names(sdk) <- c("event","dpsi","pval")
names(sds) <- c("event","dpsi","pval")
names(shk) <- c("event","dpsi","pval")
names(shs) <- c("event","dpsi","pval")
# Split SUPPA2 event IDs, to obtain the gene ID and transcript ID
sdk[, gene_id := sapply(as.character(sdk$event), function (x) strsplit(x, ";")[[1]][1])]
sds[, gene_id := sapply(as.character(sds$event), function (x) strsplit(x, ";")[[1]][1])]
shk[, gene_id := sapply(as.character(shk$event), function (x) strsplit(x, ";")[[1]][1])]
shs[, gene_id := sapply(as.character(shs$event), function (x) strsplit(x, ";")[[1]][1])]
sdk[, transc_id := sapply(as.character(sdk$event), function (x) strsplit(x, ";")[[1]][2])]
sds[, transc_id := sapply(as.character(sds$event), function (x) strsplit(x, ";")[[1]][2])]
shk[, transc_id := sapply(as.character(shk$event), function (x) strsplit(x, ";")[[1]][2])]
shs[, transc_id := sapply(as.character(shs$event), function (x) strsplit(x, ";")[[1]][2])]
# Index SUPPA2 by gene
setkey(sdk, gene_id)
setkey(sds, gene_id)
setkey(shk, gene_id)
setkey(shs, gene_id)
# Rename gene and transcript columns
names(xds)[1:2] <- c('geneID', 'txID')
names(xhs)[1:2] <- c('geneID', 'txID')
# Calculate DEXSeq effect size
xdsc[, geneID := xds$geneID]
xhsc[, geneID := xhs$geneID]
xdsc[, txID := xds$txID]
xhsc[, txID := xhs$txID]
xdsc[, sumA := rowSums(xdsc[, 1:3], na.rm=TRUE) ]
xdsc[, sumB := rowSums(xdsc[, 4:6], na.rm=TRUE) ]
xhsc[, sumA := rowSums(xhsc[, 1:3], na.rm=TRUE) ]
xhsc[, sumB := rowSums(xhsc[, 4:6], na.rm=TRUE) ]
xdsc[, totalA := sum(sumA, na.rm=TRUE), by=geneID]
xdsc[, totalB := sum(sumB, na.rm=TRUE), by=geneID]
xhsc[, totalA := sum(sumA, na.rm=TRUE), by=geneID]
xhsc[, totalB := sum(sumB, na.rm=TRUE), by=geneID]
xdsc[, propA := sumA/totalA]
xdsc[, propB := sumB/totalB]
xhsc[, propA := sumA/totalA]
xhsc[, propB := sumB/totalB]
xdsc[(is.nan(propA)), propA := NA_real_] # Replace NaN with NA.
xdsc[(is.nan(propB)), propB := NA_real_]
xhsc[(is.nan(propA)), propA := NA_real_] # Replace NaN with NA.
xhsc[(is.nan(propB)), propB := NA_real_]
xdsc[, Dprop := propB - propA]
xhsc[, Dprop := propB - propA]
xds[, Dprop := xdsc$Dprop]
xhs[, Dprop := xhsc$Dprop]
# Index DEXSeq by gene
setkey(xds, geneID, txID)
setkey(xhs, geneID, txID)
setkey(xsds, geneID, txID)
setkey(xshs, geneID, txID)
xsds[, Dprop := xdsc$Dprop]
xshs[, Dprop := xhsc$Dprop]
# Index DRIMSeq by gene
setkey(ddk10, gene_id)
setkey(dds10, gene_id)
setkey(dhk10, gene_id)
setkey(dhs10, gene_id)
setkey(ddkt10, gene_id)
setkey(ddst10, gene_id)
setkey(dhkt10, gene_id)
setkey(dhst10, gene_id)
ddkt10$Dprop <- txProp(ddkr10)
ddst10$Dprop <- txProp(ddsr10)
dhkt10$Dprop <- txProp(dhkr10)
dhst10$Dprop <- txProp(dhsr10)
rm(ddkr10)
rm(ddsr10)
rm(dhkr10)
rm(dhsr10)
ddk10$maxDprop <- ddkt10[, max(abs(Dprop)), by=gene_id]$V1 ; names(ddk10)[length(ddk10)] <- "maxDprop"
dds10$maxDprop <- ddst10[, max(abs(Dprop)), by=gene_id]$V1 ; names(dds10)[length(dds10)] <- "maxDprop"
dhk10$maxDprop <- dhkt10[, max(abs(Dprop)), by=gene_id]$V1 ; names(dhk10)[length(dhk10)] <- "maxDprop"
dhs10$maxDprop <- dhst10[, max(abs(Dprop)), by=gene_id]$V1 ; names(dhs10)[length(dhs10)] <- "maxDprop"
# Give shorter names to SUPPA2 columns, for ease of typing
names(sdk10) <- c("event","dpsi","pval")
names(sds10) <- c("event","dpsi","pval")
names(shk10) <- c("event","dpsi","pval")
names(shs10) <- c("event","dpsi","pval")
# Split SUPPA2 event IDs, to obtain the gene ID and transcript ID
sdk10[, gene_id := sapply(as.character(sdk10$event), function (x) strsplit(x, ";")[[1]][1])]
sds10[, gene_id := sapply(as.character(sds10$event), function (x) strsplit(x, ";")[[1]][1])]
shk10[, gene_id := sapply(as.character(shk10$event), function (x) strsplit(x, ";")[[1]][1])]
shs10[, gene_id := sapply(as.character(shs10$event), function (x) strsplit(x, ";")[[1]][1])]
sdk10[, transc_id := sapply(as.character(sdk10$event), function (x) strsplit(x, ";")[[1]][2])]
sds10[, transc_id := sapply(as.character(sds10$event), function (x) strsplit(x, ";")[[1]][2])]
shk10[, transc_id := sapply(as.character(shk10$event), function (x) strsplit(x, ";")[[1]][2])]
shs10[, transc_id := sapply(as.character(shs10$event), function (x) strsplit(x, ";")[[1]][2])]
# Index SUPPA2 by gene
setkey(sdk10, gene_id)
setkey(sds10, gene_id)
setkey(shk10, gene_id)
setkey(shs10, gene_id)
# Rename gene and transcript columns
names(xds10)[1:2] <- c('geneID', 'txID')
names(xhs10)[1:2] <- c('geneID', 'txID')
# Calculate DEXSeq effect size
xdsc10[, geneID := xds10$geneID]
xhsc10[, geneID := xhs10$geneID]
xdsc10[, txID := xds10$txID]
xhsc10[, txID := xhs10$txID]
xdsc10[, sumA := rowSums(xdsc10[, 1:3], na.rm=TRUE) ]
xdsc10[, sumB := rowSums(xdsc10[, 4:6], na.rm=TRUE) ]
xhsc10[, sumA := rowSums(xhsc10[, 1:3], na.rm=TRUE) ]
xhsc10[, sumB := rowSums(xhsc10[, 4:6], na.rm=TRUE) ]
xdsc10[, totalA := sum(sumA, na.rm=TRUE), by=geneID]
xdsc10[, totalB := sum(sumB, na.rm=TRUE), by=geneID]
xhsc10[, totalA := sum(sumA, na.rm=TRUE), by=geneID]
xhsc10[, totalB := sum(sumB, na.rm=TRUE), by=geneID]
xdsc10[, propA := sumA/totalA]
xdsc10[, propB := sumB/totalB]
xhsc10[, propA := sumA/totalA]
xhsc10[, propB := sumB/totalB]
xdsc10[(is.nan(propA)), propA := NA_real_] # Replace NaN with NA.
xdsc10[(is.nan(propB)), propB := NA_real_]
xhsc10[(is.nan(propA)), propA := NA_real_] # Replace NaN with NA.
xhsc10[(is.nan(propB)), propB := NA_real_]
xdsc10[, Dprop := propB - propA]
xhsc10[, Dprop := propB - propA]
xds10[, Dprop := xdsc10$Dprop]
xhs10[, Dprop := xhsc10$Dprop]
# Index DEXSeq by gene
setkey(xds10, geneID, txID)
setkey(xhs10, geneID, txID)
setkey(xsds10, geneID, txID)
setkey(xshs10, geneID, txID)
xsds10[, Dprop := xdsc10$Dprop]
xshs10[, Dprop := xhsc10$Dprop]
# Index DRIMSeq by gene
setkey(ddk50, gene_id)
setkey(dds50, gene_id)
setkey(dhk50, gene_id)
setkey(dhs50, gene_id)
setkey(ddkt50, gene_id)
setkey(ddst50, gene_id)
setkey(dhkt50, gene_id)
setkey(dhst50, gene_id)
ddkt50$Dprop <- txProp(ddkr50)
ddst50$Dprop <- txProp(ddsr50)
dhkt50$Dprop <- txProp(dhkr50)
dhst50$Dprop <- txProp(dhsr50)
rm(ddkr50)
rm(ddsr50)
rm(dhkr50)
rm(dhsr50)
ddk50$maxDprop <- ddkt50[, max(abs(Dprop)), by=gene_id]$V1 ; names(ddk50)[length(ddk50)] <- "maxDprop"
dds50$maxDprop <- ddst50[, max(abs(Dprop)), by=gene_id]$V1 ; names(dds50)[length(dds50)] <- "maxDprop"
dhk50$maxDprop <- dhkt50[, max(abs(Dprop)), by=gene_id]$V1 ; names(dhk50)[length(dhk50)] <- "maxDprop"
dhs50$maxDprop <- dhst50[, max(abs(Dprop)), by=gene_id]$V1 ; names(dhs50)[length(dhs50)] <- "maxDprop"
# Give shorter names to SUPPA2 columns, for ease of typing
names(sdk50) <- c("event","dpsi","pval")
names(sds50) <- c("event","dpsi","pval")
names(shk50) <- c("event","dpsi","pval")
names(shs50) <- c("event","dpsi","pval")
# Split SUPPA2 event IDs, to obtain the gene ID and transcript ID
sdk50[, gene_id := sapply(as.character(sdk50$event), function (x) strsplit(x, ";")[[1]][1])]
sds50[, gene_id := sapply(as.character(sds50$event), function (x) strsplit(x, ";")[[1]][1])]
shk50[, gene_id := sapply(as.character(shk50$event), function (x) strsplit(x, ";")[[1]][1])]
shs50[, gene_id := sapply(as.character(shs50$event), function (x) strsplit(x, ";")[[1]][1])]
sdk50[, transc_id := sapply(as.character(sdk50$event), function (x) strsplit(x, ";")[[1]][2])]
sds50[, transc_id := sapply(as.character(sds50$event), function (x) strsplit(x, ";")[[1]][2])]
shk50[, transc_id := sapply(as.character(shk50$event), function (x) strsplit(x, ";")[[1]][2])]
shs50[, transc_id := sapply(as.character(shs50$event), function (x) strsplit(x, ";")[[1]][2])]
# Index SUPPA2 by gene
setkey(sdk50, gene_id)
setkey(sds50, gene_id)
setkey(shk50, gene_id)
setkey(shs50, gene_id)
# Rename gene and transcript columns
names(xds50)[1:2] <- c('geneID', 'txID')
names(xhs50)[1:2] <- c('geneID', 'txID')
# Calculate DEXSeq effect size
xdsc50[, geneID := xds50$geneID]
xhsc50[, geneID := xhs50$geneID]
xdsc50[, txID := xds50$txID]
xhsc50[, txID := xhs50$txID]
xdsc50[, sumA := rowSums(xdsc50[, 1:3], na.rm=TRUE) ]
xdsc50[, sumB := rowSums(xdsc50[, 4:6], na.rm=TRUE) ]
xhsc50[, sumA := rowSums(xhsc50[, 1:3], na.rm=TRUE) ]
xhsc50[, sumB := rowSums(xhsc50[, 4:6], na.rm=TRUE) ]
xdsc50[, totalA := sum(sumA, na.rm=TRUE), by=geneID]
xdsc50[, totalB := sum(sumB, na.rm=TRUE), by=geneID]
xhsc50[, totalA := sum(sumA, na.rm=TRUE), by=geneID]
xhsc50[, totalB := sum(sumB, na.rm=TRUE), by=geneID]
xdsc50[, propA := sumA/totalA]
xdsc50[, propB := sumB/totalB]
xhsc50[, propA := sumA/totalA]
xhsc50[, propB := sumB/totalB]
xdsc50[(is.nan(propA)), propA := NA_real_] # Replace NaN with NA.
xdsc50[(is.nan(propB)), propB := NA_real_]
xhsc50[(is.nan(propA)), propA := NA_real_] # Replace NaN with NA.
xhsc50[(is.nan(propB)), propB := NA_real_]
xdsc50[, Dprop := propB - propA]
xhsc50[, Dprop := propB - propA]
xds50[, Dprop := xdsc50$Dprop]
xhs50[, Dprop := xhsc50$Dprop]
# Index DEXSeq by gene
setkey(xds50, geneID, txID)
setkey(xhs50, geneID, txID)
setkey(xsds50, geneID, txID)
setkey(xshs50, geneID, txID)
xsds50[, Dprop := xdsc50$Dprop]
xshs50[, Dprop := xhsc50$Dprop]
# Verify which field defines the 1000 transcripts that were intentionally altered
cat("Values of Status")
## Values of Status
print( dtruth[, unique(ds_status)] )
## [1] 0 1
cat("Number of fly genes")
## Number of fly genes
print(dim(dtruth)[1])
## [1] 13937
cat("Number of edited genes")
## Number of edited genes
print( dim(dtruth[ds_status==1,])[1] )
## [1] 1000
dtg <- dtruth[ds_status==1, gene] # list of tweaked genes in fly
dcg <- dtruth[ds_status==0, gene] # list of control genes in fly
cat("Values of Status")
## Values of Status
print( htruth[, unique(ds_status)] )
## [1] 0 1
cat("Number of human genes")
## Number of human genes
print(dim(htruth)[1])
## [1] 20410
cat("Number of edited genes")
## Number of edited genes
print( dim(htruth[ds_status==1,])[1] )
## [1] 1000
htg <- htruth[ds_status==1, gene] # list of tweaked genes in human
hcg <- htruth[ds_status==0, gene] # list of control genes in human
cat("Number of genes not edited in fly and human respectively")
## Number of genes not edited in fly and human respectively
dxg <- rdk$Genes[!parent_id %in% dtruth$gene, parent_id]
print(length(dxg))
## [1] 1745
hxg <- rhk$Genes[!parent_id %in% htruth$gene, parent_id]
print(length(hxg))
## [1] 41483
These lists of edited and non-edited genes will serve as the basis for the evaluation of the tools. The simulated sets are restricted to protein-coding genes, whereas the quantifications were called on the full transcriptome annotations. Therefore, one would expect the expression levels of the excluded non-coding genes to be 0.
Not much info is given about the expression level and effect size of the genes making up the truth. We can use the abundances stored in the RATs output to get an idea of how the truth is distributed.
# Add flag fields, to mark the rows that are genes among the selected 1000 or the excluded.
# This prevents repeated subsetting and lookup later on.
# DRIMSeq
ddk[, selected := gene_id %in% dtg]
dds[, selected := gene_id %in% dtg]
dhk[, selected := gene_id %in% htg]
dhs[, selected := gene_id %in% htg]
ddk[, excluded := gene_id %in% dxg]
dds[, excluded := gene_id %in% dxg]
dhk[, excluded := gene_id %in% hxg]
dhs[, excluded := gene_id %in% hxg]
ddkt[, selected := gene_id %in% dtg]
ddst[, selected := gene_id %in% dtg]
dhkt[, selected := gene_id %in% htg]
dhst[, selected := gene_id %in% htg]
ddkt[, excluded := gene_id %in% dxg]
ddst[, excluded := gene_id %in% dxg]
dhkt[, excluded := gene_id %in% hxg]
dhst[, excluded := gene_id %in% hxg]
# SUPPA2
sdk[, selected := gene_id %in% dtg]
sds[, selected := gene_id %in% dtg]
shk[, selected := gene_id %in% htg]
shs[, selected := gene_id %in% htg]
sdk[, excluded := gene_id %in% dxg]
sds[, excluded := gene_id %in% dxg]
shk[, excluded := gene_id %in% hxg]
shs[, excluded := gene_id %in% hxg]
# DEXSeq
xds[, selected := geneID %in% dtg]
xhs[, selected := geneID %in% htg]
xds[, excluded := geneID %in% dxg]
xhs[, excluded := geneID %in% hxg]
xsds[, selected := geneID %in% dtg]
xshs[, selected := geneID %in% htg]
xsds[, excluded := geneID %in% dxg]
xshs[, excluded := geneID %in% hxg]
# RATs
rdk$Genes[, selected := parent_id %in% dtg]
rds$Genes[, selected := parent_id %in% dtg]
rhk$Genes[, selected := parent_id %in% htg]
rhs$Genes[, selected := parent_id %in% htg]
rdk$Transcripts[, selected := parent_id %in% dtg]
rds$Transcripts[, selected := parent_id %in% dtg]
rhk$Transcripts[, selected := parent_id %in% htg]
rhs$Transcripts[, selected := parent_id %in% htg]
rdk$Genes[, excluded := parent_id %in% dxg]
rds$Genes[, excluded := parent_id %in% dxg]
rhk$Genes[, excluded := parent_id %in% hxg]
rhs$Genes[, excluded := parent_id %in% hxg]
rdk$Transcripts[, excluded := parent_id %in% dxg]
rds$Transcripts[, excluded := parent_id %in% dxg]
rhk$Transcripts[, excluded := parent_id %in% hxg]
rhs$Transcripts[, excluded := parent_id %in% hxg]
rdkth$Genes[, selected := parent_id %in% dtg]
rdsth$Genes[, selected := parent_id %in% dtg]
rhkth$Genes[, selected := parent_id %in% htg]
rhsth$Genes[, selected := parent_id %in% htg]
rdkth$Transcripts[, selected := parent_id %in% dtg]
rdsth$Transcripts[, selected := parent_id %in% dtg]
rhkth$Transcripts[, selected := parent_id %in% htg]
rhsth$Transcripts[, selected := parent_id %in% htg]
rdkth$Genes[, excluded := parent_id %in% dxg]
rdsth$Genes[, excluded := parent_id %in% dxg]
rhkth$Genes[, excluded := parent_id %in% hxg]
rhsth$Genes[, excluded := parent_id %in% hxg]
rdkth$Transcripts[, excluded := parent_id %in% dxg]
rdsth$Transcripts[, excluded := parent_id %in% dxg]
rhkth$Transcripts[, excluded := parent_id %in% hxg]
rhsth$Transcripts[, excluded := parent_id %in% hxg]
rdkth2$Genes[, selected := parent_id %in% dtg]
rdsth2$Genes[, selected := parent_id %in% dtg]
rhkth2$Genes[, selected := parent_id %in% htg]
rhsth2$Genes[, selected := parent_id %in% htg]
rdkth2$Transcripts[, selected := parent_id %in% dtg]
rdsth2$Transcripts[, selected := parent_id %in% dtg]
rhkth2$Transcripts[, selected := parent_id %in% htg]
rhsth2$Transcripts[, selected := parent_id %in% htg]
rdkth2$Genes[, excluded := parent_id %in% dxg]
rdsth2$Genes[, excluded := parent_id %in% dxg]
rhkth2$Genes[, excluded := parent_id %in% hxg]
rhsth2$Genes[, excluded := parent_id %in% hxg]
rdkth2$Transcripts[, excluded := parent_id %in% dxg]
rdsth2$Transcripts[, excluded := parent_id %in% dxg]
rhkth2$Transcripts[, excluded := parent_id %in% hxg]
rhsth2$Transcripts[, excluded := parent_id %in% hxg]
# DRIMSeq
ddk10[, selected := gene_id %in% dtg]
dds10[, selected := gene_id %in% dtg]
dhk10[, selected := gene_id %in% htg]
dhs10[, selected := gene_id %in% htg]
ddk10[, excluded := gene_id %in% dxg]
dds10[, excluded := gene_id %in% dxg]
dhk10[, excluded := gene_id %in% hxg]
dhs10[, excluded := gene_id %in% hxg]
ddkt10[, selected := gene_id %in% dtg]
ddst10[, selected := gene_id %in% dtg]
dhkt10[, selected := gene_id %in% htg]
dhst10[, selected := gene_id %in% htg]
ddkt10[, excluded := gene_id %in% dxg]
ddst10[, excluded := gene_id %in% dxg]
dhkt10[, excluded := gene_id %in% hxg]
dhst10[, excluded := gene_id %in% hxg]
# SUPPA2
sdk10[, selected := gene_id %in% dtg]
sds10[, selected := gene_id %in% dtg]
shk10[, selected := gene_id %in% htg]
shs10[, selected := gene_id %in% htg]
sdk10[, excluded := gene_id %in% dxg]
sds10[, excluded := gene_id %in% dxg]
shk10[, excluded := gene_id %in% hxg]
shs10[, excluded := gene_id %in% hxg]
# DEXSeq
xds10[, selected := geneID %in% dtg]
xhs10[, selected := geneID %in% htg]
xds10[, excluded := geneID %in% dxg]
xhs10[, excluded := geneID %in% hxg]
xsds10[, selected := geneID %in% dtg]
xshs10[, selected := geneID %in% htg]
xsds10[, excluded := geneID %in% dxg]
xshs10[, excluded := geneID %in% hxg]
# RATs
rdk10$Genes[, selected := parent_id %in% dtg]
rds10$Genes[, selected := parent_id %in% dtg]
rhk10$Genes[, selected := parent_id %in% htg]
rhs10$Genes[, selected := parent_id %in% htg]
rdk10$Transcripts[, selected := parent_id %in% dtg]
rds10$Transcripts[, selected := parent_id %in% dtg]
rhk10$Transcripts[, selected := parent_id %in% htg]
rhs10$Transcripts[, selected := parent_id %in% htg]
rdk10$Genes[, excluded := parent_id %in% dxg]
rds10$Genes[, excluded := parent_id %in% dxg]
rhk10$Genes[, excluded := parent_id %in% hxg]
rhs10$Genes[, excluded := parent_id %in% hxg]
rdk10$Transcripts[, excluded := parent_id %in% dxg]
rds10$Transcripts[, excluded := parent_id %in% dxg]
rhk10$Transcripts[, excluded := parent_id %in% hxg]
rhs10$Transcripts[, excluded := parent_id %in% hxg]
rdkth10$Genes[, selected := parent_id %in% dtg]
rdsth10$Genes[, selected := parent_id %in% dtg]
rhkth10$Genes[, selected := parent_id %in% htg]
rhsth10$Genes[, selected := parent_id %in% htg]
rdkth10$Transcripts[, selected := parent_id %in% dtg]
rdsth10$Transcripts[, selected := parent_id %in% dtg]
rhkth10$Transcripts[, selected := parent_id %in% htg]
rhsth10$Transcripts[, selected := parent_id %in% htg]
rdkth10$Genes[, excluded := parent_id %in% dxg]
rdsth10$Genes[, excluded := parent_id %in% dxg]
rhkth10$Genes[, excluded := parent_id %in% hxg]
rhsth10$Genes[, excluded := parent_id %in% hxg]
rdkth10$Transcripts[, excluded := parent_id %in% dxg]
rdsth10$Transcripts[, excluded := parent_id %in% dxg]
rhkth10$Transcripts[, excluded := parent_id %in% hxg]
rhsth10$Transcripts[, excluded := parent_id %in% hxg]
rdkth210$Genes[, selected := parent_id %in% dtg]
rdsth210$Genes[, selected := parent_id %in% dtg]
rhkth210$Genes[, selected := parent_id %in% htg]
rhsth210$Genes[, selected := parent_id %in% htg]
rdkth210$Transcripts[, selected := parent_id %in% dtg]
rdsth210$Transcripts[, selected := parent_id %in% dtg]
rhkth210$Transcripts[, selected := parent_id %in% htg]
rhsth210$Transcripts[, selected := parent_id %in% htg]
rdkth210$Genes[, excluded := parent_id %in% dxg]
rdsth210$Genes[, excluded := parent_id %in% dxg]
rhkth210$Genes[, excluded := parent_id %in% hxg]
rhsth210$Genes[, excluded := parent_id %in% hxg]
rdkth210$Transcripts[, excluded := parent_id %in% dxg]
rdsth210$Transcripts[, excluded := parent_id %in% dxg]
rhkth210$Transcripts[, excluded := parent_id %in% hxg]
rhsth210$Transcripts[, excluded := parent_id %in% hxg]
# DRIMSeq
ddk50[, selected := gene_id %in% dtg]
dds50[, selected := gene_id %in% dtg]
dhk50[, selected := gene_id %in% htg]
dhs50[, selected := gene_id %in% htg]
ddk50[, excluded := gene_id %in% dxg]
dds50[, excluded := gene_id %in% dxg]
dhk50[, excluded := gene_id %in% hxg]
dhs50[, excluded := gene_id %in% hxg]
ddkt50[, selected := gene_id %in% dtg]
ddst50[, selected := gene_id %in% dtg]
dhkt50[, selected := gene_id %in% htg]
dhst50[, selected := gene_id %in% htg]
ddkt50[, excluded := gene_id %in% dxg]
ddst50[, excluded := gene_id %in% dxg]
dhkt50[, excluded := gene_id %in% hxg]
dhst50[, excluded := gene_id %in% hxg]
# SUPPA2
sdk50[, selected := gene_id %in% dtg]
sds50[, selected := gene_id %in% dtg]
shk50[, selected := gene_id %in% htg]
shs50[, selected := gene_id %in% htg]
sdk50[, excluded := gene_id %in% dxg]
sds50[, excluded := gene_id %in% dxg]
shk50[, excluded := gene_id %in% hxg]
shs50[, excluded := gene_id %in% hxg]
# DEXSeq
xds50[, selected := geneID %in% dtg]
xhs50[, selected := geneID %in% htg]
xds50[, excluded := geneID %in% dxg]
xhs50[, excluded := geneID %in% hxg]
xsds50[, selected := geneID %in% dtg]
xshs50[, selected := geneID %in% htg]
xsds50[, excluded := geneID %in% dxg]
xshs50[, excluded := geneID %in% hxg]
# RATs
rdk50$Genes[, selected := parent_id %in% dtg]
rds50$Genes[, selected := parent_id %in% dtg]
rhk50$Genes[, selected := parent_id %in% htg]
rhs50$Genes[, selected := parent_id %in% htg]
rdk50$Transcripts[, selected := parent_id %in% dtg]
rds50$Transcripts[, selected := parent_id %in% dtg]
rhk50$Transcripts[, selected := parent_id %in% htg]
rhs50$Transcripts[, selected := parent_id %in% htg]
rdk50$Genes[, excluded := parent_id %in% dxg]
rds50$Genes[, excluded := parent_id %in% dxg]
rhk50$Genes[, excluded := parent_id %in% hxg]
rhs50$Genes[, excluded := parent_id %in% hxg]
rdk50$Transcripts[, excluded := parent_id %in% dxg]
rds50$Transcripts[, excluded := parent_id %in% dxg]
rhk50$Transcripts[, excluded := parent_id %in% hxg]
rhs50$Transcripts[, excluded := parent_id %in% hxg]
rdkth50$Genes[, selected := parent_id %in% dtg]
rdsth50$Genes[, selected := parent_id %in% dtg]
rhkth50$Genes[, selected := parent_id %in% htg]
rhsth50$Genes[, selected := parent_id %in% htg]
rdkth50$Transcripts[, selected := parent_id %in% dtg]
rdsth50$Transcripts[, selected := parent_id %in% dtg]
rhkth50$Transcripts[, selected := parent_id %in% htg]
rhsth50$Transcripts[, selected := parent_id %in% htg]
rdkth50$Genes[, excluded := parent_id %in% dxg]
rdsth50$Genes[, excluded := parent_id %in% dxg]
rhkth50$Genes[, excluded := parent_id %in% hxg]
rhsth50$Genes[, excluded := parent_id %in% hxg]
rdkth50$Transcripts[, excluded := parent_id %in% dxg]
rdsth50$Transcripts[, excluded := parent_id %in% dxg]
rhkth50$Transcripts[, excluded := parent_id %in% hxg]
rhsth50$Transcripts[, excluded := parent_id %in% hxg]
rdkth250$Genes[, selected := parent_id %in% dtg]
rdsth250$Genes[, selected := parent_id %in% dtg]
rhkth250$Genes[, selected := parent_id %in% htg]
rhsth250$Genes[, selected := parent_id %in% htg]
rdkth250$Transcripts[, selected := parent_id %in% dtg]
rdsth250$Transcripts[, selected := parent_id %in% dtg]
rhkth250$Transcripts[, selected := parent_id %in% htg]
rhsth250$Transcripts[, selected := parent_id %in% htg]
rdkth250$Genes[, excluded := parent_id %in% dxg]
rdsth250$Genes[, excluded := parent_id %in% dxg]
rhkth250$Genes[, excluded := parent_id %in% hxg]
rhsth250$Genes[, excluded := parent_id %in% hxg]
rdkth250$Transcripts[, excluded := parent_id %in% dxg]
rdsth250$Transcripts[, excluded := parent_id %in% dxg]
rhkth250$Transcripts[, excluded := parent_id %in% hxg]
rhsth250$Transcripts[, excluded := parent_id %in% hxg]
Plot of aggregated expression across all isoforms for each gene. Categorised by coding genes selected for DTU creation, coding genes not selected, and non-coding genes that were excluded from the expression simulation.
# Gene expression level of genes selected to be true DTU events.
df <- data.table(expression= c(rdk$Transcripts[, unique(totalA), by=parent_id]$V1,
rds$Transcripts[, unique(totalA), by=parent_id]$V1,
rhk$Transcripts[, unique(totalA), by=parent_id]$V1,
rhs$Transcripts[, unique(totalA), by=parent_id]$V1),
selected= c(rdk$Transcripts[, unique(selected), by=parent_id]$V1,
rds$Transcripts[, unique(selected), by=parent_id]$V1,
rhk$Transcripts[, unique(selected), by=parent_id]$V1,
rhs$Transcripts[, unique(selected), by=parent_id]$V1),
excluded= c(rdk$Transcripts[, unique(excluded), by=parent_id]$V1,
rds$Transcripts[, unique(excluded), by=parent_id]$V1,
rhk$Transcripts[, unique(excluded), by=parent_id]$V1,
rhs$Transcripts[, unique(excluded), by=parent_id]$V1),
cond= c(rep('Fruitfly', dim(rdk$Genes)[1] + dim(rds$Genes)[1]),
rep('Human', dim(rhk$Genes)[1] + dim(rhs$Genes)[1])),
quant= c(rep('Kallisto', dim(rdk$Genes)[1]), rep('Salmon', dim(rds$Genes)[1]),
rep('Kallisto', dim(rhk$Genes)[1]), rep('Salmon', dim(rhs$Genes)[1])))
df$SelectedCoding <- paste(as.character(df$selected), as.character(!df$excluded))
p <- ggplot(df, aes(x=expression, fill= SelectedCoding)) + gth +
facet_grid(cond ~ quant, scales='fixed') +
geom_histogram(position=position_dodge()) +
scale_fill_manual(values=c('gold', 'firebrick1', 'steelblue')) +
labs(x='total # isoform-length normalised reads', y='gene count', title='Gene expression levels in truth subset')
print(p)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#print(p + scale_x_continuous(limits=c(NA,100000)) + coord_cartesian(ylim=c(0,2000)))
print(p + scale_x_continuous(limits=c(NA,15000)) + coord_cartesian(ylim=c(0,500)))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 4020 rows containing non-finite values (stat_bin).
## Warning: Removed 8 rows containing missing values (geom_bar).
print(p + scale_x_continuous(limits=c(NA,2500)) + coord_cartesian(ylim=c(0,20)))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 21107 rows containing non-finite values (stat_bin).
## Warning: Removed 8 rows containing missing values (geom_bar).
How’s the effect size distributed between the genes selected for abundance editing, versus all the remaining genes (including the genes excluded from the simulation of abundances).
# Obtain FX size from RATs output.
# Since pre-filtering does not physically remove entries from RATs
# we have to use the `elig` flag to plot the data the meets the threshold.
df <- rbind(data.table(expression= c(rdk$Transcripts[(elig), Dprop],
rds$Transcripts[(elig), Dprop],
rhk$Transcripts[(elig), Dprop],
rhs$Transcripts[(elig), Dprop]),
selected= c(rdk$Transcripts[(elig), selected],
rds$Transcripts[(elig), selected],
rhk$Transcripts[(elig), selected],
rhs$Transcripts[(elig), selected]),
excluded= c(rdk$Transcripts[(elig), excluded],
rds$Transcripts[(elig), excluded],
rhk$Transcripts[(elig), excluded],
rhs$Transcripts[(elig), excluded]),
cond= c(rep('Fruitfly', dim(rdk$Transcripts[(elig),])[1] + dim(rds$Transcripts[(elig),])[1]),
rep('Human', dim(rhk$Transcripts[(elig),])[1] + dim(rhs$Transcripts[(elig),])[1])),
quant= c(rep('Kallisto', dim(rdk$Transcripts[(elig),])[1]), rep('Salmon', dim(rds$Transcripts[(elig),])[1]),
rep('Kallisto', dim(rhk$Transcripts[(elig),])[1]), rep('Salmon', dim(rhs$Transcripts[(elig),])[1])),
prefilter=0),
data.table(expression= c(rdk10$Transcripts[(elig), Dprop],
rds10$Transcripts[(elig), Dprop],
rhk10$Transcripts[(elig), Dprop],
rhs10$Transcripts[(elig), Dprop]),
selected= c(rdk10$Transcripts[(elig), selected],
rds10$Transcripts[(elig), selected],
rhk10$Transcripts[(elig), selected],
rhs10$Transcripts[(elig), selected]),
excluded= c(rdk10$Transcripts[(elig), excluded],
rds10$Transcripts[(elig), excluded],
rhk10$Transcripts[(elig), excluded],
rhs10$Transcripts[(elig), excluded]),
cond= c(rep('Fruitfly', dim(rdk10$Transcripts[(elig),])[1] + dim(rds10$Transcripts[(elig),])[1]),
rep('Human', dim(rhk10$Transcripts[(elig),])[1] + dim(rhs10$Transcripts[(elig),])[1])),
quant= c(rep('Kallisto', dim(rdk10$Transcripts[(elig),])[1]), rep('Salmon', dim(rds10$Transcripts[(elig),])[1]),
rep('Kallisto', dim(rhk10$Transcripts[(elig),])[1]), rep('Salmon', dim(rhs10$Transcripts[(elig),])[1])),
prefilter=10),
data.table(expression= c(rdk50$Transcripts[(elig), Dprop],
rds50$Transcripts[(elig), Dprop],
rhk50$Transcripts[(elig), Dprop],
rhs50$Transcripts[(elig), Dprop]),
selected= c(rdk50$Transcripts[(elig), selected],
rds50$Transcripts[(elig), selected],
rhk50$Transcripts[(elig), selected],
rhs50$Transcripts[(elig), selected]),
excluded= c(rdk50$Transcripts[(elig), excluded],
rds50$Transcripts[(elig), excluded],
rhk50$Transcripts[(elig), excluded],
rhs50$Transcripts[(elig), excluded]),
cond= c(rep('Fruitfly', dim(rdk50$Transcripts[(elig),])[1] + dim(rds50$Transcripts[(elig),])[1]),
rep('Human', dim(rhk50$Transcripts[(elig),])[1] + dim(rhs50$Transcripts[(elig),])[1])),
quant= c(rep('Kallisto', dim(rdk50$Transcripts[(elig),])[1]), rep('Salmon', dim(rds50$Transcripts[(elig),])[1]),
rep('Kallisto', dim(rhk50$Transcripts[(elig),])[1]), rep('Salmon', dim(rhs50$Transcripts[(elig),])[1])),
prefilter=50)
)
df$SelectedCoding <- paste(as.character(df$selected), as.character(!df$excluded))
p <- ggplot(df, aes(x = expression, fill = SelectedCoding)) + gth +
facet_grid(cond ~ quant, scales='fixed') +
transition_states(prefilter, transition_length = 0.5, state_length = 3) +
geom_histogram(position = position_dodge()) +
scale_fill_manual(values=c('FALSE FALSE'='gold', 'FALSE TRUE'='firebrick1', 'TRUE TRUE'='steelblue')) +
labs(x='Dprop', title='RATs effect size distribution', subtitle="Abundance prefilter: {closest_state}")
anim_save("analysis_sim_v3_truthFXdist_rats.gif", p)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
anim_save("analysis_sim_v3_truthFXdist_rats1.gif", p + coord_cartesian(ylim=c(0,10000)))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
anim_save("analysis_sim_v3_truthFXdist_rats2.gif", p + coord_cartesian(ylim=c(0,1000)))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Are there really impossible Dprop values below -1? (there are NA Dprop values, so the outcome of this query will be either TRUE or NA)
print( any(rhs$Transcripts$Dprop < -1) )
## [1] NA
# Obtain FX size from SUPPA2 output.
df <- rbind(data.table(expression= c(sdk[, dpsi], sds[, dpsi], shk[, dpsi], shs[, dpsi]),
selected= c(sdk[, selected], sds[, selected], shk[, selected], shs[, selected]),
excluded= c(sdk[, excluded], sds[, excluded], shk[, excluded], shs[, excluded]),
cond= c(rep('Fruitfly', dim(sdk)[1] + dim(sds)[1]), rep('Human', dim(shk)[1] + dim(shs)[1])),
quant= c(rep('Kallisto', dim(sdk)[1]), rep('Salmon', dim(sds)[1]), rep('Kallisto', dim(shk)[1]), rep('Salmon', dim(shs)[1])),
prefilter=0),
data.table(expression= c(sdk10[, dpsi], sds10[, dpsi], shk10[, dpsi], shs10[, dpsi]),
selected= c(sdk10[, selected], sds10[, selected], shk10[, selected], shs10[, selected]),
excluded= c(sdk10[, excluded], sds10[, excluded], shk10[, excluded], shs10[, excluded]),
cond= c(rep('Fruitfly', dim(sdk10)[1] + dim(sds10)[1]), rep('Human', dim(shk10)[1] + dim(shs10)[1])),
quant= c(rep('Kallisto', dim(sdk10)[1]), rep('Salmon', dim(sds10)[1]), rep('Kallisto', dim(shk10)[1]), rep('Salmon', dim(shs10)[1])),
prefilter=10),
data.table(expression= c(sdk50[, dpsi], sds50[, dpsi], shk50[, dpsi], shs50[, dpsi]),
selected= c(sdk50[, selected], sds50[, selected], shk50[, selected], shs50[, selected]),
excluded= c(sdk50[, excluded], sds50[, excluded], shk50[, excluded], shs50[, excluded]),
cond= c(rep('Fruitfly', dim(sdk50)[1] + dim(sds50)[1]), rep('Human', dim(shk50)[1] + dim(shs50)[1])),
quant= c(rep('Kallisto', dim(sdk50)[1]), rep('Salmon', dim(sds50)[1]), rep('Kallisto', dim(shk50)[1]), rep('Salmon', dim(shs50)[1])),
prefilter=50)
)
df$SelectedCoding <- paste(as.character(df$selected), as.character(!df$excluded))
p <- ggplot(df, aes(x = expression, fill = SelectedCoding)) + gth +
facet_grid(cond ~ quant, scales='fixed') +
transition_states(prefilter, transition_length = 0.5, state_length = 3) +
geom_histogram(position=position_dodge()) +
scale_fill_manual(values=c('FALSE FALSE'='gold', 'FALSE TRUE'='firebrick1', 'TRUE TRUE'='steelblue')) +
labs(x='dPSI', title='SUPPA2 effect size distribution', subtitle="Abundance prefilter: {closest_state}")
anim_save("analysis_sim_v3_truthFXdist_suppa.gif", p)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 519294 rows containing non-finite values (stat_bin).
anim_save("analysis_sim_v3_truthFXdist_suppa1.gif", p + coord_cartesian(ylim=c(0,10000)))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 519294 rows containing non-finite values (stat_bin).
anim_save("analysis_sim_v3_truthFXdist_suppa2.gif", p + coord_cartesian(ylim=c(0,1000)))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 519294 rows containing non-finite values (stat_bin).
# Obtained FX size from DRIMSeq pre-comparison object.
df <- rbind(data.table(expression= c(ddkt[, Dprop], ddst[, Dprop], dhkt[, Dprop], dhst[, Dprop]),
selected= c(ddkt[, selected], ddst[, selected], dhkt[, selected],dhst[, selected]),
excluded= c(ddkt[, excluded], ddst[, excluded], dhkt[, excluded],dhst[, excluded]),
cond= c(rep('Fruitfly', dim(ddkt)[1] + dim(ddst)[1]), rep('Human', dim(dhkt)[1] + dim(dhst)[1])),
quant= c(rep('Kallisto', dim(ddkt)[1]), rep('Salmon', dim(ddst)[1]), rep('Kallisto', dim(dhkt)[1]), rep('Salmon', dim(dhst)[1])),
prefilter=0),
data.table(expression= c(ddkt10[, Dprop], ddst10[, Dprop], dhkt10[, Dprop], dhst10[, Dprop]),
selected= c(ddkt10[, selected], ddst10[, selected], dhkt10[, selected], dhst10[, selected]),
excluded= c(ddkt10[, excluded], ddst10[, excluded], dhkt10[, excluded], dhst10[, excluded]),
cond= c(rep('Fruitfly', dim(ddkt10)[1] + dim(ddst10)[1]), rep('Human', dim(dhkt10)[1] + dim(dhst10)[1])),
quant= c(rep('Kallisto', dim(ddkt10)[1]), rep('Salmon', dim(ddst10)[1]), rep('Kallisto', dim(dhkt10)[1]), rep('Salmon', dim(dhst10)[1])),
prefilter=10),
data.table(expression= c(ddkt50[, Dprop], ddst50[, Dprop], dhkt50[, Dprop], dhst50[, Dprop]),
selected= c(ddkt50[, selected], ddst50[, selected], dhkt50[, selected], dhst50[, selected]),
excluded= c(ddkt50[, excluded], ddst50[, excluded], dhkt50[, excluded], dhst50[, excluded]),
cond= c(rep('Fruitfly', dim(ddkt50)[1] + dim(ddst50)[1]), rep('Human', dim(dhkt50)[1] + dim(dhst50)[1])),
quant= c(rep('Kallisto', dim(ddkt50)[1]), rep('Salmon', dim(ddst50)[1]), rep('Kallisto', dim(dhkt50)[1]), rep('Salmon', dim(dhst50)[1])),
prefilter=50)
)
df$SelectedCoding <- paste(as.character(df$selected), as.character(!df$excluded))
p <- ggplot(df, aes(x = expression, fill = selected)) + gth +
facet_grid(cond ~ quant, scales='fixed') +
transition_states(prefilter, transition_length = 0.5, state_length = 3) +
geom_histogram(position=position_dodge()) +
scale_fill_manual(values=c('FALSE FALSE'='gold', 'FALSE TRUE'='firebrick1', 'TRUE TRUE'='steelblue')) +
labs(x='Dprop', title='DRIMSeq effect size distribution', subtitle="Abundance prefilter: {closest_state}")
anim_save("analysis_sim_v3_truthFXdist_drimseq.gif", p)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 666 rows containing non-finite values (stat_bin).
anim_save("analysis_sim_v3_truthFXdist_drimseq1.gif", p + coord_cartesian(ylim=c(0,10000)))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 666 rows containing non-finite values (stat_bin).
anim_save("analysis_sim_v3_truthFXdist_drimseq2.gif", p + coord_cartesian(ylim=c(0,1000)))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 666 rows containing non-finite values (stat_bin).
# Obtain FX size from DEXSeq output.
df <- rbind(data.table(expression= c(xds[, Dprop], xhs[, Dprop]),
selected= c(xds[, selected], xhs[, selected]),
excluded= c(xds[, excluded], xhs[, excluded]),
cond= c(rep('Fruitfly', dim(xds)[1]), rep('Human', dim(xhs)[1])),
quant= c(rep('Salmon', dim(xds)[1]), rep('Salmon', dim(xhs)[1])),
prefilter=0),
data.table(expression= c(xds10[, Dprop], xhs10[, Dprop]),
selected= c(xds10[, selected], xhs10[, selected]),
excluded= c(xds10[, excluded], xhs10[, excluded]),
cond= c(rep('Fruitfly', dim(xds10)[1]), rep('Human', dim(xhs10)[1])),
quant= c(rep('Salmon', dim(xds10)[1]), rep('Salmon', dim(xhs10)[1])),
prefilter=10),
data.table(expression= c(xds50[, Dprop], xhs50[, Dprop]),
selected= c(xds50[, selected], xhs50[, selected]),
excluded= c(xds10[, excluded], xhs10[, excluded]),
cond= c(rep('Fruitfly', dim(xds50)[1]), rep('Human', dim(xhs50)[1])),
quant= c(rep('Salmon', dim(xds50)[1]), rep('Salmon', dim(xhs50)[1])),
prefilter=50)
)
df$SelectedCoding <- paste(as.character(df$selected), as.character(!df$excluded))
p <- ggplot(df, aes(x = expression, fill = selected)) + gth +
facet_grid(cond ~ quant, scales='fixed') +
transition_states(prefilter, transition_length = 0.5, state_length = 3) +
geom_histogram(position=position_dodge()) +
scale_fill_manual(values=c('FALSE FALSE'='gold', 'FALSE TRUE'='firebrick1', 'TRUE TRUE'='steelblue')) +
labs(x='Dprop', title='DEXSeq effect size distribution', subtitle="Abundance prefilter: {closest_state}")
anim_save("analysis_sim_v3_truthFXdist_dexseq.gif", p)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 290704 rows containing non-finite values (stat_bin).
anim_save("analysis_sim_v3_truthFXdist_dexseq1.gif", p + coord_cartesian(ylim=c(0,10000)))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 290704 rows containing non-finite values (stat_bin).
anim_save("analysis_sim_v3_truthFXdist_dexseq2.gif", p + coord_cartesian(ylim=c(0,1000)))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 290704 rows containing non-finite values (stat_bin).
The truth consists of swapping abundance for the two most abundant isoforms of 1000 well-expressed genes in each dataset. Ideally, tools are expected to identify all of these 1000 genes or 2000 transcripts, and only those. As all genes were selected to have high expression, false negatives are expected only for very small effect sizes. No a-priori limit was used in generating the dataset, and no such pre-filter was applied prior to DTU calling. As all other genes and transcripts should be completely unchanged by design, all false positives must be attributed to random fluctuations in the simulation and quantification processes: The datasets were simulated to identical final abundances, except for the 1000 selected genes, but the reads were created independently for each set. Thus, the exact set of reads would influence the quantification.
We track the following values:
# Colour code
col <- c('TP'='green', 'FP'='red', 'TN'='blue', 'FN'='darkorange', 'TNA'='grey50', 'FNA'='pink')
SUPPA2 and DRIMSeq do not apply thresholds or classify DTU events. They return the test results raw. RATs also reports the raw results, but the reproducibility feature requires that specific significance, effect size and noise thresholds be defined in advance, and any post-hoc change of these thresholds invalidates the reproducibility measurements. However, the reproducibility thresholds can be adjusted post-hoc, with no need for a rerun.
The simulated sets were focused on coding genes only. Non-coding genes would be expected to appear as non-expressed and could be added to the FP and TN. Here, we opted to calculate the performance metrics using only the genes explicitly designated as either positive or negative.
# Count TP, FP, TN, FN
# Not used directly.
countup <- function(x, tool, fx, qt=0.95, rt=0.85) {
if (tool=='RATs(g)') { # RATs Gene level
# Re-classify DTU
dk <- x[[1]]$Gene[, !is.na(quant_dtu_freq) & quant_dtu_freq >= qt & !is.na(rep_dtu_freq) & rep_dtu_freq >= rt]
ds <- x[[2]]$Gene[, !is.na(quant_dtu_freq) & quant_dtu_freq >= qt & !is.na(rep_dtu_freq) & rep_dtu_freq >= rt]
hk <- x[[3]]$Gene[, !is.na(quant_dtu_freq) & quant_dtu_freq >= qt & !is.na(rep_dtu_freq) & rep_dtu_freq >= rt]
hs <- x[[4]]$Gene[, !is.na(quant_dtu_freq) & quant_dtu_freq >= qt & !is.na(rep_dtu_freq) & rep_dtu_freq >= rt]
# Structure.
return( data.table(tp= c(dim( x[[1]]$Genes[dk & selected, TRUE])[1],
dim(x[[2]]$Genes[ds & selected, TRUE])[1],
dim(x[[3]]$Genes[hk & selected, TRUE])[1],
dim(x[[4]]$Genes[hs & selected, TRUE])[1] ),
fp= c(dim(x[[1]]$Genes[dk & !(selected | excluded), TRUE])[1],
dim(x[[2]]$Genes[ds & !(selected | excluded), TRUE])[1],
dim(x[[3]]$Genes[hk & !(selected | excluded), TRUE])[1],
dim(x[[4]]$Genes[hs & !(selected | excluded), TRUE])[1] ),
tn= c(dim(x[[1]]$Genes[(!dk) & !(selected | excluded), TRUE])[1],
dim(x[[2]]$Genes[(!ds) & !(selected | excluded), TRUE])[1],
dim(x[[3]]$Genes[(!hk) & !(selected | excluded), TRUE])[1],
dim(x[[4]]$Genes[(!hs) & !(selected | excluded), TRUE])[1] ),
fn= c(dim(x[[1]]$Genes[(!dk) & selected, TRUE])[1],
dim(x[[2]]$Genes[(!ds) & selected, TRUE])[1],
dim(x[[3]]$Genes[(!hk) & selected, TRUE])[1],
dim(x[[4]]$Genes[(!hs) & selected, TRUE])[1] ),
fna= c(dim(x[[1]]$Genes[dk & excluded, TRUE])[1],
dim(x[[2]]$Genes[ds & excluded, TRUE])[1],
dim(x[[3]]$Genes[hk & excluded, TRUE])[1],
dim(x[[4]]$Genes[hs & excluded, TRUE])[1] ),
tna= c(dim(x[[1]]$Genes[(!dk) & !excluded, TRUE])[1],
dim(x[[2]]$Genes[(!ds) & !excluded, TRUE])[1],
dim(x[[3]]$Genes[(!hk) & !excluded, TRUE])[1],
dim(x[[4]]$Genes[(!hs) & !excluded, TRUE])[1] ),
set= c('Fruitfly', 'Fruitfly', 'Human', 'Human'),
quant= c('Kallisto', 'Salmon', 'Kallisto', 'Salmon') ) )
} else if (tool=='RATs(t)') { # RATs transcript level
# Reclassify DTU.
tmp <- data.table(gene = x[[1]]$Transcript$parent_id, sel = x[[1]]$Transcript$selected, xcl = x[[1]]$Transcript$excluded,
thit = x[[1]]$Transcripts[, !is.na(quant_dtu_freq) & quant_dtu_freq >= qt & !is.na(rep_dtu_freq) & rep_dtu_freq >= rt], ghit = NA)
tmp[, ghit := any(thit), by= gene]
dk <- c( tp = length(tmp[ghit & sel, TRUE, by=gene]$V1),
fp = length(tmp[ghit & !(sel | xcl), TRUE, by=gene]$V1),
fn = length(tmp[(!ghit) & sel, TRUE, by=gene]$V1),
tn = length(tmp[(!ghit) & !(sel | xcl), TRUE, by=gene]$V1),
fna = length(tmp[(ghit) & xcl, TRUE, by=gene]$V1),
tna = length(tmp[(!ghit) & xcl, TRUE, by=gene]$V1) )
tmp <- data.table(gene = x[[2]]$Transcript$parent_id, sel = x[[2]]$Transcript$selected, xcl = x[[2]]$Transcript$excluded,
thit = x[[2]]$Transcripts[, !is.na(quant_dtu_freq) & quant_dtu_freq >= qt & !is.na(rep_dtu_freq) & rep_dtu_freq >= rt], ghit = NA)
tmp[, ghit := any(thit), by= gene]
ds <- c( tp = length(tmp[ghit & sel, TRUE, by=gene]$V1),
fp = length(tmp[ghit & !(sel | xcl), TRUE, by=gene]$V1),
fn = length(tmp[(!ghit) & sel, TRUE, by=gene]$V1),
tn = length(tmp[(!ghit) & !(sel | xcl), TRUE, by=gene]$V1),
fna = length(tmp[(ghit) & xcl, TRUE, by=gene]$V1),
tna = length(tmp[(!ghit) & xcl, TRUE, by=gene]$V1) )
tmp <- data.table(gene = x[[3]]$Transcript$parent_id, sel = x[[3]]$Transcript$selected, xcl = x[[3]]$Transcript$excluded,
thit = x[[3]]$Transcript[, !is.na(quant_dtu_freq) & quant_dtu_freq >= qt & !is.na(rep_dtu_freq) & rep_dtu_freq >= rt], ghit = NA)
tmp[, ghit := any(thit), by= gene]
hk <- c( tp = length(tmp[ghit & sel, TRUE, by=gene]$V1),
fp = length(tmp[ghit & !(sel | xcl), TRUE, by=gene]$V1),
fn = length(tmp[(!ghit) & sel, TRUE, by=gene]$V1),
tn = length(tmp[(!ghit) & !(sel | xcl), TRUE, by=gene]$V1),
fna = length(tmp[(ghit) & xcl, TRUE, by=gene]$V1),
tna = length(tmp[(!ghit) & xcl, TRUE, by=gene]$V1) )
tmp <- data.table(gene = x[[4]]$Transcript$parent_id, sel = x[[4]]$Transcript$selected, xcl = x[[4]]$Transcript$excluded,
thit = x[[4]]$Transcript[, !is.na(quant_dtu_freq) & quant_dtu_freq >= qt & !is.na(rep_dtu_freq) & rep_dtu_freq >= rt], ghit = NA)
tmp[, ghit := any(thit), by= gene]
hs <- c( tp = length(tmp[ghit & sel, TRUE, by=gene]$V1),
fp = length(tmp[ghit & !(sel | xcl), TRUE, by=gene]$V1),
fn = length(tmp[(!ghit) & sel, TRUE, by=gene]$V1),
tn = length(tmp[(!ghit) & !(sel | xcl), TRUE, by=gene]$V1),
fna = length(tmp[(ghit) & xcl, TRUE, by=gene]$V1),
tna = length(tmp[(!ghit) & xcl, TRUE, by=gene]$V1) )
# Structure.
return( data.table(tp= c(dk['tp'], ds['tp'], hk['tp'], hs['tp']),
fp= c(dk['fp'], ds['fp'], hk['fp'], hs['fp']),
fn= c(dk['fn'], ds['fn'], hk['fn'], hs['fn']),
tn= c(dk['tn'], ds['tn'], hk['tn'], hs['tn']),
fna= c(dk['fna'], ds['fna'], hk['fna'], hs['fna']),
tna= c(dk['tna'], ds['tna'], hk['tna'], hs['tna']),
set= c('Fruitfly', 'Fruitfly', 'Human', 'Human'),
quant= c('Kallisto', 'Salmon', 'Kallisto', 'Salmon') ) )
} else if (tool=='SUPPA2') {
# Apply thresholds and aggregate by gene
tmp <- data.table(gene = x[[1]]$gene_id, sel = x[[1]]$selected, xcl = x[[1]]$excluded, thit = x[[1]]$dpsi >= fx & x[[1]]$pval < 0.05, ghit = NA)
tmp[, ghit := any(thit), by= gene]
dk <- c( tp = length(tmp[ghit & sel, TRUE, by=gene]$V1),
fp = length(tmp[ghit & !(sel | xcl), TRUE, by=gene]$V1),
fn = length(tmp[(!ghit) & sel, TRUE, by=gene]$V1),
tn = length(tmp[(!ghit) & !(sel | xcl), TRUE, by=gene]$V1),
fna = length(tmp[(ghit) & xcl, TRUE, by=gene]$V1),
tna = length(tmp[(!ghit) & xcl, TRUE, by=gene]$V1) )
tmp <- data.table(gene = x[[2]]$gene_id, sel = x[[2]]$selected, xcl = x[[2]]$excluded, thit = x[[2]]$dpsi >= fx & x[[2]]$pval < 0.05, ghit = NA)
tmp[, ghit := any(thit), by= gene]
ds <- c( tp = length(tmp[ghit & sel, TRUE, by=gene]$V1),
fp = length(tmp[ghit & !(sel | xcl), TRUE, by=gene]$V1),
fn = length(tmp[(!ghit) & sel, TRUE, by=gene]$V1),
tn = length(tmp[(!ghit) & !(sel | xcl), TRUE, by=gene]$V1),
fna = length(tmp[(ghit) & xcl, TRUE, by=gene]$V1),
tna = length(tmp[(!ghit) & xcl, TRUE, by=gene]$V1) )
tmp <- data.table(gene = x[[3]]$gene_id, sel = x[[3]]$selected, xcl = x[[3]]$excluded, thit = x[[3]]$dpsi >= fx & x[[3]]$pval < 0.05, ghit = NA)
tmp[, ghit := any(thit), by= gene]
hk <- c( tp = length(tmp[ghit & sel, TRUE, by=gene]$V1),
fp = length(tmp[ghit & !(sel | xcl), TRUE, by=gene]$V1),
fn = length(tmp[(!ghit) & sel, TRUE, by=gene]$V1),
tn = length(tmp[(!ghit) & !(sel | xcl), TRUE, by=gene]$V1),
fna = length(tmp[(ghit) & xcl, TRUE, by=gene]$V1),
tna = length(tmp[(!ghit) & xcl, TRUE, by=gene]$V1) )
tmp <- data.table(gene = x[[4]]$gene_id, sel = x[[4]]$selected, xcl = x[[4]]$excluded, thit = x[[4]]$dpsi >= fx & x[[4]]$pval < 0.05, ghit = NA)
tmp[, ghit := any(thit), by= gene]
hs <- c( tp = length(tmp[ghit & sel, TRUE, by=gene]$V1),
fp = length(tmp[ghit & !(sel | xcl), TRUE, by=gene]$V1),
fn = length(tmp[(!ghit) & sel, TRUE, by=gene]$V1),
tn = length(tmp[(!ghit) & !(sel | xcl), TRUE, by=gene]$V1),
fna = length(tmp[(ghit) & xcl, TRUE, by=gene]$V1),
tna = length(tmp[(!ghit) & xcl, TRUE, by=gene]$V1) )
# Lack of report as FALSE, like RATs does.
dk[is.na(dk)] <- FALSE
ds[is.na(ds)] <- FALSE
hk[is.na(hk)] <- FALSE
hk[is.na(hs)] <- FALSE
# Structure.
return( data.table(tp= c(dk['tp'], ds['tp'], hk['tp'], hs['tp']),
fp= c(dk['fp'], ds['fp'], hk['fp'], hs['fp']),
fn= c(dk['fn'], ds['fn'], hk['fn'], hs['fn']),
tn= c(dk['tn'], ds['tn'], hk['tn'], hs['tn']),
fna= c(dk['fna'], ds['fna'], hk['fna'], hs['fna']),
tna= c(dk['tna'], ds['tna'], hk['tna'], hs['tna']),
set= c('Fruitfly', 'Fruitfly', 'Human', 'Human'),
quant= c('Kallisto', 'Salmon', 'Kallisto', 'Salmon') ) )
} else if (tool=='DRIMSeq(g)') {
# Apply thresholds. any() is used to reduce a group of identical values to just one.
dk <- x[[1]][, adj_pvalue < 0.05] & x[[1]][, maxDprop >= fx]
ds <- x[[2]][, adj_pvalue < 0.05] & x[[2]][, maxDprop >= fx]
hk <- x[[3]][, adj_pvalue < 0.05] & x[[3]][, maxDprop >= fx]
hs <- x[[4]][, adj_pvalue < 0.05] & x[[4]][, maxDprop >= fx]
# Lack of report as FALSE, like RATs does.
dk[is.na(dk)] <- FALSE
ds[is.na(ds)] <- FALSE
hk[is.na(hk)] <- FALSE
hk[is.na(hs)] <- FALSE
# Structure.
return( data.table(tp= c(dim(x[[1]][dk & selected, TRUE])[1],
dim(x[[2]][ds & selected, TRUE])[1],
dim(x[[3]][hk & selected, TRUE])[1],
dim(x[[4]][hs & selected, TRUE])[1] ),
fp= c(dim(x[[1]][dk & !(selected | excluded), TRUE])[1],
dim(x[[2]][ds & !(selected | excluded), TRUE])[1],
dim(x[[3]][hk & !(selected | excluded), TRUE])[1],
dim(x[[4]][hs & !(selected | excluded), TRUE])[1] ),
fn= c(dim(x[[1]][(!dk) & selected, TRUE])[1],
dim(x[[2]][(!ds) & selected, TRUE])[1],
dim(x[[3]][(!hk) & selected, TRUE])[1],
dim(x[[4]][(!hs) & selected, TRUE])[1] ),
tn= c(dim(x[[1]][(!dk) & !(selected | excluded), TRUE])[1],
dim(x[[2]][(!ds) & !(selected | excluded), TRUE])[1],
dim(x[[3]][(!hk) & !(selected | excluded), TRUE])[1],
dim(x[[4]][(!hs) & !(selected | excluded), TRUE])[1] ),
fna= c(dim(x[[1]][dk & excluded, TRUE])[1],
dim(x[[2]][ds & excluded, TRUE])[1],
dim(x[[3]][hk & excluded, TRUE])[1],
dim(x[[4]][hs & excluded, TRUE])[1] ),
tna= c(dim(x[[1]][(!dk) & !excluded, TRUE])[1],
dim(x[[2]][(!ds) & !excluded, TRUE])[1],
dim(x[[3]][(!hk) & !excluded, TRUE])[1],
dim(x[[4]][(!hs) & !excluded, TRUE])[1] ),
set= c('Fruitfly', 'Fruitfly', 'Human', 'Human'),
quant= c('Kallisto', 'Salmon', 'Kallisto', 'Salmon') ) )
} else if (tool=='DRIMSeq(t)') {
# Apply thresholds and aggregate by gene
tmp <- data.table(gene = x[[1]]$gene_id, sel = x[[1]]$selected, xcl = x[[1]]$excluded, thit = x[[1]]$Dprop >= fx & x[[1]]$adj_pvalue < 0.05, ghit = NA)
tmp[, ghit := any(thit), by= gene]
dk <- c( tp = length(tmp[ghit & sel, TRUE, by=gene]$V1),
fp = length(tmp[ghit & !(sel | xcl), TRUE, by=gene]$V1),
fn = length(tmp[(!ghit) & sel, TRUE, by=gene]$V1),
tn = length(tmp[(!ghit) & !(sel | xcl), TRUE, by=gene]$V1),
fna = length(tmp[(ghit) & xcl, TRUE, by=gene]$V1),
tna = length(tmp[(!ghit) & xcl, TRUE, by=gene]$V1) )
tmp <- data.table(gene = x[[2]]$gene_id, sel = x[[2]]$selected, xcl = x[[2]]$excluded, thit = x[[2]]$Dprop >= fx & x[[2]]$adj_pvalue < 0.05, ghit = NA)
tmp[, ghit := any(thit), by= gene]
ds <- c( tp = length(tmp[ghit & sel, TRUE, by=gene]$V1),
fp = length(tmp[ghit & !(sel | xcl), TRUE, by=gene]$V1),
fn = length(tmp[(!ghit) & sel, TRUE, by=gene]$V1),
tn = length(tmp[(!ghit) & !(sel | xcl), TRUE, by=gene]$V1),
fna = length(tmp[(ghit) & xcl, TRUE, by=gene]$V1),
tna = length(tmp[(!ghit) & xcl, TRUE, by=gene]$V1) )
tmp <- data.table(gene = x[[3]]$gene_id, sel = x[[3]]$selected, xcl = x[[3]]$excluded, thit = x[[3]]$Dprop >= fx & x[[3]]$adj_pvalue < 0.05, ghit = NA)
tmp[, ghit := any(thit), by= gene]
hk <- c( tp = length(tmp[ghit & sel, TRUE, by=gene]$V1),
fp = length(tmp[ghit & !(sel | xcl), TRUE, by=gene]$V1),
fn = length(tmp[(!ghit) & sel, TRUE, by=gene]$V1),
tn = length(tmp[(!ghit) & !(sel | xcl), TRUE, by=gene]$V1),
fna = length(tmp[(ghit) & xcl, TRUE, by=gene]$V1),
tna = length(tmp[(!ghit) & xcl, TRUE, by=gene]$V1) )
tmp <- data.table(gene = x[[4]]$gene_id, sel = x[[4]]$selected, xcl = x[[4]]$excluded, thit = x[[4]]$Dprop >= fx & x[[4]]$adj_pvalue < 0.05, ghit = NA)
tmp[, ghit := any(thit), by= gene]
hs <- c( tp = length(tmp[ghit & sel, TRUE, by=gene]$V1),
fp = length(tmp[ghit & !(sel | xcl), TRUE, by=gene]$V1),
fn = length(tmp[(!ghit) & sel, TRUE, by=gene]$V1),
tn = length(tmp[(!ghit) & !(sel | xcl), TRUE, by=gene]$V1),
fna = length(tmp[(ghit) & xcl, TRUE, by=gene]$V1),
tna = length(tmp[(!ghit) & xcl, TRUE, by=gene]$V1) )
# Lack of report as FALSE, like RATs does.
dk[is.na(dk)] <- FALSE
ds[is.na(ds)] <- FALSE
hk[is.na(hk)] <- FALSE
hk[is.na(hs)] <- FALSE
# Structure.
return( data.table(tp= c(dk['tp'], ds['tp'], hk['tp'], hs['tp']),
fp= c(dk['fp'], ds['fp'], hk['fp'], hs['fp']),
fn= c(dk['fn'], ds['fn'], hk['fn'], hs['fn']),
tn= c(dk['tn'], ds['tn'], hk['tn'], hs['tn']),
fna= c(dk['fna'], ds['fna'], hk['fna'], hs['fna']),
tna= c(dk['tna'], ds['tna'], hk['tna'], hs['tna']),
set= c('Fruitfly', 'Fruitfly', 'Human', 'Human'),
quant= c('Kallisto', 'Salmon', 'Kallisto', 'Salmon') ) )
} else if (tool=='DEXSeq') {
# Apply thresholds and aggregate by gene
tmp <- data.table(gene = x[[1]]$geneID, sel = x[[1]]$selected, xcl = x[[1]]$excluded, thit = x[[1]]$padj < 0.05 & x[[1]]$Dprop >= fx, ghit = NA)
tmp[is.na(thit), thit := FALSE] # Ineligible genes are NA, but we'll count them as FALSE, just like RATs does.
tmp[, ghit := any(thit), by= gene]
dk <- c( tp = length(tmp[ghit & sel, TRUE, by=gene]$V1),
fp = length(tmp[ghit & !(sel | xcl), TRUE, by=gene]$V1),
fn = length(tmp[(!ghit) & sel, TRUE, by=gene]$V1),
tn = length(tmp[(!ghit) & !(sel | xcl), TRUE, by=gene]$V1),
fna = length(tmp[(ghit) & xcl, TRUE, by=gene]$V1),
tna = length(tmp[(!ghit) & xcl, TRUE, by=gene]$V1) )
tmp <- data.table(gene = x[[2]]$geneID, sel = x[[2]]$selected, xcl = x[[2]]$excluded, thit = x[[2]]$transcript < 0.05 & x[[2]]$Dprop >= fx, ghit = NA)
tmp[is.na(thit), thit := FALSE]
tmp[, ghit := any(thit), by= gene]
ds <- c( tp = length(tmp[ghit & sel, TRUE, by=gene]$V1),
fp = length(tmp[ghit & !(sel | xcl), TRUE, by=gene]$V1),
fn = length(tmp[(!ghit) & sel, TRUE, by=gene]$V1),
tn = length(tmp[(!ghit) & !(sel | xcl), TRUE, by=gene]$V1),
fna = length(tmp[(ghit) & xcl, TRUE, by=gene]$V1),
tna = length(tmp[(!ghit) & xcl, TRUE, by=gene]$V1) )
tmp <- data.table(gene = x[[3]]$geneID, sel = x[[3]]$selected, xcl = x[[3]]$excluded, thit = x[[3]]$padj < 0.05 & x[[3]]$Dprop >= fx, ghit = NA)
tmp[is.na(thit), thit := FALSE]
tmp[, ghit := any(thit), by= gene]
hk <- c( tp = length(tmp[ghit & sel, TRUE, by=gene]$V1),
fp = length(tmp[ghit & !(sel | xcl), TRUE, by=gene]$V1),
fn = length(tmp[(!ghit) & sel, TRUE, by=gene]$V1),
tn = length(tmp[(!ghit) & !(sel | xcl), TRUE, by=gene]$V1),
fna = length(tmp[(ghit) & xcl, TRUE, by=gene]$V1),
tna = length(tmp[(!ghit) & xcl, TRUE, by=gene]$V1) )
tmp <- data.table(gene = x[[4]]$geneID, sel = x[[4]]$selected, xcl = x[[4]]$excluded, thit = x[[4]]$transcript < 0.05 & x[[4]]$Dprop >= fx, ghit = NA)
tmp[is.na(thit), thit := FALSE]
tmp[, ghit := any(thit), by= gene]
hs <- c( tp = length(tmp[ghit & sel, TRUE, by=gene]$V1),
fp = length(tmp[ghit & !(sel | xcl), TRUE, by=gene]$V1),
fn = length(tmp[(!ghit) & sel, TRUE, by=gene]$V1),
tn = length(tmp[(!ghit) & !(sel | xcl), TRUE, by=gene]$V1),
fna = length(tmp[(ghit) & xcl, TRUE, by=gene]$V1),
tna = length(tmp[(!ghit) & xcl, TRUE, by=gene]$V1) )
# Lack of report as FALSE, like RATs does.
dk[is.na(dk)] <- FALSE
ds[is.na(ds)] <- FALSE
hk[is.na(hk)] <- FALSE
hk[is.na(hs)] <- FALSE
# Structure.
return( data.table(tp= c(dk['tp'], ds['tp'], hk['tp'], hs['tp']),
fp= c(dk['fp'], ds['fp'], hk['fp'], hs['fp']),
fn= c(dk['fn'], ds['fn'], hk['fn'], hs['fn']),
tn= c(dk['tn'], ds['tn'], hk['tn'], hs['tn']),
fna= c(dk['fna'], ds['fna'], hk['fna'], hs['fna']),
tna= c(dk['tna'], ds['tna'], hk['tna'], hs['tna']),
set= c('Fruitfly', 'Fruitfly', 'Human', 'Human'),
quant= c('Salmon', 'Salmon', 'Salmon', 'Salmon') ) )
} else {
stop('Tool?')
}
}
# Calculate performance metrics.
# x: List in order fly/kallisto, fly/salmon, human/kallisto, human/salmon
measureup <- function(x, tool, fx, qt=0.95, rt=0.85, pre=0) {
df <- countup(x, tool, fx, qt, rt)
dt <- data.table(value= c(round(df[, tp/(tp+fn)], digits=3),
round(df[, fp/(tp+fp)], digits=3),
round(df[, as.double(tp*tn - fp*fn) / sqrt( as.double(tp + fp)*as.double(tp + fn)*as.double(tn + fp)*as.double(tn + fn) )], digits=3)),
type= factor(c(rep('Sensitivity', 4), rep('FDR', 4), rep('MCC', 4)), levels=c('Sensitivity', 'FDR', 'MCC')),
tool= factor(rep(tool, 12), levels=c('DRIMSeq(g)', 'DRIMSeq(t)', 'RATs(g)', 'RATs(t)', 'SUPPA2', 'DEXSeq')),
species=rep(df$set, 3),
quantification=rep(df$quant, 3),
effect=rep(paste('D >= ', fx), 12) ,
reproducibility=factor(rep(paste('Qrep', ifelse(is.na(qt), '', '>='), qt, 'Rrep', ifelse(is.na(rt), '', '>='), rt), 12)),
prefilter=pre)
if (tool=='DEXSeq')
dt[, tool := rep(c('DEXSeq', 'DEXsgr'), 6)]
return(dt)
}
RATs
# RATs Dprop >= 0.20
rgd <- measureup(list(rdk,rds,rhk,rhs), 'RATs(g)', fx=0.20, qt=0.95, rt=0.85, pre=0)
rtd <- measureup(list(rdk,rds,rhk,rhs), 'RATs(t)', fx=0.20, qt=0.95, rt=0.85, pre=0)
# more permissive reproducibility
rgd1 <- measureup(list(rdk,rds,rhk,rhs), 'RATs(g)', fx=0.20, qt=0.90, rt=0.75, pre=0)
rtd1 <- measureup(list(rdk,rds,rhk,rhs), 'RATs(t)', fx=0.20, qt=0.90, rt=0.75, pre=0)
rgd2 <- measureup(list(rdk,rds,rhk,rhs), 'RATs(g)', fx=0.20, qt=0.85, rt=0.65, pre=0)
rtd2 <- measureup(list(rdk,rds,rhk,rhs), 'RATs(t)', fx=0.20, qt=0.85, rt=0.65, pre=0)
rgd3 <- measureup(list(rdk,rds,rhk,rhs), 'RATs(g)', fx=0.20, qt=0.80, rt=0.55, pre=0)
rtd3 <- measureup(list(rdk,rds,rhk,rhs), 'RATs(t)', fx=0.20, qt=0.80, rt=0.55, pre=0)
# RATs Dprop >= 0.10
rgf <- measureup(list(rdkth,rdsth,rhkth,rhsth), 'RATs(g)', fx=0.10, qt=0.95, rt=0.85, pre=0)
rtf <- measureup(list(rdkth,rdsth,rhkth,rhsth), 'RATs(t)', fx=0.10, qt=0.95, rt=0.85, pre=0)
# more permissive reproducibility
rgf1 <- measureup(list(rdkth,rdsth,rhkth,rhsth), 'RATs(g)', fx=0.10, qt=0.90, rt=0.75, pre=0)
rtf1 <- measureup(list(rdkth,rdsth,rhkth,rhsth), 'RATs(t)', fx=0.10, qt=0.90, rt=0.75, pre=0)
rgf2 <- measureup(list(rdkth,rdsth,rhkth,rhsth), 'RATs(g)', fx=0.10, qt=0.85, rt=0.65, pre=0)
rtf2 <- measureup(list(rdkth,rdsth,rhkth,rhsth), 'RATs(t)', fx=0.10, qt=0.85, rt=0.65, pre=0)
rgf3 <- measureup(list(rdkth,rdsth,rhkth,rhsth), 'RATs(g)', fx=0.10, qt=0.80, rt=0.55, pre=0)
rtf3 <- measureup(list(rdkth,rdsth,rhkth,rhsth), 'RATs(t)', fx=0.10, qt=0.80, rt=0.55, pre=0)
# RATs Dprop >= 0.05
rgff <- measureup(list(rdkth2,rdsth2,rhkth2,rhsth2), 'RATs(g)', fx=0.05, qt=0.95, rt=0.85, pre=0)
rtff <- measureup(list(rdkth2,rdsth2,rhkth2,rhsth2), 'RATs(t)', fx=0.05, qt=0.95, rt=0.85, pre=0)
# more permissive reproducibility
rgff1 <- measureup(list(rdkth2,rdsth2,rhkth2,rhsth2), 'RATs(g)', fx=0.05, qt=0.90, rt=0.75, pre=0)
rtff1 <- measureup(list(rdkth2,rdsth2,rhkth2,rhsth2), 'RATs(t)', fx=0.05, qt=0.90, rt=0.75, pre=0)
rgff2 <- measureup(list(rdkth2,rdsth2,rhkth2,rhsth2), 'RATs(g)', fx=0.05, qt=0.85, rt=0.65, pre=0)
rtff2 <- measureup(list(rdkth2,rdsth2,rhkth2,rhsth2), 'RATs(t)', fx=0.05, qt=0.85, rt=0.65, pre=0)
rgff3 <- measureup(list(rdkth2,rdsth2,rhkth2,rhsth2), 'RATs(g)', fx=0.05, qt=0.80, rt=0.55, pre=0)
rtff3 <- measureup(list(rdkth2,rdsth2,rhkth2,rhsth2), 'RATs(t)', fx=0.05, qt=0.80, rt=0.55, pre=0)
# RATs Dprop >= 0.20
rgd10 <- measureup(list(rdk10,rds10,rhk10,rhs10), 'RATs(g)', fx=0.20, qt=0.95, rt=0.85, pre=10)
rtd10 <- measureup(list(rdk10,rds10,rhk10,rhs10), 'RATs(t)', fx=0.20, qt=0.95, rt=0.85, pre=10)
# more permissive reproducibility
rgd110 <- measureup(list(rdk10,rds10,rhk10,rhs10), 'RATs(g)', fx=0.20, qt=0.90, rt=0.75, pre=10)
rtd110 <- measureup(list(rdk10,rds10,rhk10,rhs10), 'RATs(t)', fx=0.20, qt=0.90, rt=0.75, pre=10)
rgd210 <- measureup(list(rdk10,rds10,rhk10,rhs10), 'RATs(g)', fx=0.20, qt=0.85, rt=0.65, pre=10)
rtd210 <- measureup(list(rdk10,rds10,rhk10,rhs10), 'RATs(t)', fx=0.20, qt=0.85, rt=0.65, pre=10)
rgd310 <- measureup(list(rdk10,rds10,rhk10,rhs10), 'RATs(g)', fx=0.20, qt=0.80, rt=0.55, pre=10)
rtd310 <- measureup(list(rdk10,rds10,rhk10,rhs10), 'RATs(t)', fx=0.20, qt=0.80, rt=0.55, pre=10)
# RATs Dprop >= 0.10
rgf10 <- measureup(list(rdkth10,rdsth10,rhkth10,rhsth10), 'RATs(g)', fx=0.10, qt=0.95, rt=0.85, pre=10)
rtf10 <- measureup(list(rdkth10,rdsth10,rhkth10,rhsth10), 'RATs(t)', fx=0.10, qt=0.95, rt=0.85, pre=10)
# more permissive reproducibility
rgf110 <- measureup(list(rdkth10,rdsth10,rhkth10,rhsth10), 'RATs(g)', fx=0.10, qt=0.90, rt=0.75, pre=10)
rtf110 <- measureup(list(rdkth10,rdsth10,rhkth10,rhsth10), 'RATs(t)', fx=0.10, qt=0.90, rt=0.75, pre=10)
rgf210 <- measureup(list(rdkth10,rdsth10,rhkth10,rhsth10), 'RATs(g)', fx=0.10, qt=0.85, rt=0.65, pre=10)
rtf210 <- measureup(list(rdkth10,rdsth10,rhkth10,rhsth10), 'RATs(t)', fx=0.10, qt=0.85, rt=0.65, pre=10)
rgf310 <- measureup(list(rdkth10,rdsth10,rhkth10,rhsth10), 'RATs(g)', fx=0.10, qt=0.80, rt=0.55, pre=10)
rtf310 <- measureup(list(rdkth10,rdsth10,rhkth10,rhsth10), 'RATs(t)', fx=0.10, qt=0.80, rt=0.55, pre=10)
# RATs Dprop >= 0.05
rgff10 <- measureup(list(rdkth210,rdsth210,rhkth210,rhsth210), 'RATs(g)', fx=0.05, qt=0.95, rt=0.85, pre=10)
rtff10 <- measureup(list(rdkth210,rdsth210,rhkth210,rhsth210), 'RATs(t)', fx=0.05, qt=0.95, rt=0.85, pre=10)
# more permissive reproducibility
rgff110 <- measureup(list(rdkth210,rdsth210,rhkth210,rhsth210), 'RATs(g)', fx=0.05, qt=0.90, rt=0.75, pre=10)
rtff110 <- measureup(list(rdkth210,rdsth210,rhkth210,rhsth210), 'RATs(t)', fx=0.05, qt=0.90, rt=0.75, pre=10)
rgff210 <- measureup(list(rdkth210,rdsth210,rhkth210,rhsth210), 'RATs(g)', fx=0.05, qt=0.85, rt=0.65, pre=10)
rtff210 <- measureup(list(rdkth210,rdsth210,rhkth210,rhsth210), 'RATs(t)', fx=0.05, qt=0.85, rt=0.65, pre=10)
rgff310 <- measureup(list(rdkth210,rdsth210,rhkth210,rhsth210), 'RATs(g)', fx=0.05, qt=0.80, rt=0.55, pre=10)
rtff310 <- measureup(list(rdkth210,rdsth210,rhkth210,rhsth210), 'RATs(t)', fx=0.05, qt=0.80, rt=0.55, pre=10)
# RATs Dprop >= 0.20
rgd50 <- measureup(list(rdk50,rds50,rhk50,rhs50), 'RATs(g)', fx=0.20, qt=0.95, rt=0.85, pre=50)
rtd50 <- measureup(list(rdk50,rds50,rhk50,rhs50), 'RATs(t)', fx=0.20, qt=0.95, rt=0.85, pre=50)
# more permissive reproducibility
rgd150 <- measureup(list(rdk50,rds50,rhk50,rhs50), 'RATs(g)', fx=0.20, qt=0.90, rt=0.75, pre=50)
rtd150 <- measureup(list(rdk50,rds50,rhk50,rhs50), 'RATs(t)', fx=0.20, qt=0.90, rt=0.75, pre=50)
rgd250 <- measureup(list(rdk50,rds50,rhk50,rhs50), 'RATs(g)', fx=0.20, qt=0.85, rt=0.65, pre=50)
rtd250 <- measureup(list(rdk50,rds50,rhk50,rhs50), 'RATs(t)', fx=0.20, qt=0.85, rt=0.65, pre=50)
rgd350 <- measureup(list(rdk50,rds50,rhk50,rhs50), 'RATs(g)', fx=0.20, qt=0.80, rt=0.55, pre=50)
rtd350 <- measureup(list(rdk50,rds50,rhk50,rhs50), 'RATs(t)', fx=0.20, qt=0.80, rt=0.55, pre=50)
# RATs Dprop >= 0.10
rgf50 <- measureup(list(rdkth50,rdsth50,rhkth50,rhsth50), 'RATs(g)', fx=0.10, qt=0.95, rt=0.85, pre=50)
rtf50 <- measureup(list(rdkth50,rdsth50,rhkth50,rhsth50), 'RATs(t)', fx=0.10, qt=0.95, rt=0.85, pre=50)
# more permissive reproducibility
rgf150 <- measureup(list(rdkth50,rdsth50,rhkth50,rhsth50), 'RATs(g)', fx=0.10, qt=0.90, rt=0.75, pre=50)
rtf150 <- measureup(list(rdkth50,rdsth50,rhkth50,rhsth50), 'RATs(t)', fx=0.10, qt=0.90, rt=0.75, pre=50)
rgf250 <- measureup(list(rdkth50,rdsth50,rhkth50,rhsth50), 'RATs(g)', fx=0.10, qt=0.85, rt=0.65, pre=50)
rtf250 <- measureup(list(rdkth50,rdsth50,rhkth50,rhsth50), 'RATs(t)', fx=0.10, qt=0.85, rt=0.65, pre=50)
rgf350 <- measureup(list(rdkth50,rdsth50,rhkth50,rhsth50), 'RATs(g)', fx=0.10, qt=0.80, rt=0.55, pre=50)
rtf350 <- measureup(list(rdkth50,rdsth50,rhkth50,rhsth50), 'RATs(t)', fx=0.10, qt=0.80, rt=0.55, pre=50)
# RATs Dprop >= 0.05
rgff50 <- measureup(list(rdkth250,rdsth2,rhkth2,rhsth2), 'RATs(g)', fx=0.05, qt=0.95, rt=0.85, pre=50)
rtff50 <- measureup(list(rdkth250,rdsth2,rhkth2,rhsth2), 'RATs(t)', fx=0.05, qt=0.95, rt=0.85, pre=50)
# more permissive reproducibility
rgff150 <- measureup(list(rdkth250,rdsth250,rhkth250,rhsth250), 'RATs(g)', fx=0.05, qt=0.90, rt=0.75, pre=50)
rtff150 <- measureup(list(rdkth250,rdsth250,rhkth250,rhsth250), 'RATs(t)', fx=0.05, qt=0.90, rt=0.75, pre=50)
rgff250 <- measureup(list(rdkth250,rdsth250,rhkth250,rhsth250), 'RATs(g)', fx=0.05, qt=0.85, rt=0.65, pre=50)
rtff250 <- measureup(list(rdkth250,rdsth250,rhkth250,rhsth250), 'RATs(t)', fx=0.05, qt=0.85, rt=0.65, pre=50)
rgff350 <- measureup(list(rdkth250,rdsth250,rhkth250,rhsth250), 'RATs(g)', fx=0.05, qt=0.80, rt=0.55, pre=50)
rtff350 <- measureup(list(rdkth250,rdsth250,rhkth250,rhsth250), 'RATs(t)', fx=0.05, qt=0.80, rt=0.55, pre=50)
SUPPA2
# SUPPA2 does not bootstrap the reproducibility.
# SUPPA2 dPSI >= 0.20
sd <- measureup(list(sdk,sds,shk,shs), 'SUPPA2', fx=0.20, qt=NA_real_, rt=NA_real_, pre=0)
# SUPPA2 dPSI >= 0.10
sf <- measureup(list(sdk,sds,shk,shs), 'SUPPA2', fx=0.10, qt=NA_real_, rt=NA_real_, pre=0)
# SUPPA2 dPSI >= 0.05
sff <- measureup(list(sdk,sds,shk,shs), 'SUPPA2', fx=0.05, qt=NA_real_, rt=NA_real_, pre=0)
# SUPPA2 does not bootstrap the reproducibility.
# SUPPA2 dPSI >= 0.20
sd10 <- measureup(list(sdk10,sds10,shk10,shs10), 'SUPPA2', fx=0.20, qt=NA_real_, rt=NA_real_, pre=10)
# SUPPA2 dPSI >= 0.10
sf10 <- measureup(list(sdk10,sds10,shk10,shs10), 'SUPPA2', fx=0.10, qt=NA_real_, rt=NA_real_, pre=10)
# SUPPA2 dPSI >= 0.05
sff10 <- measureup(list(sdk10,sds10,shk10,shs10), 'SUPPA2', fx=0.05, qt=NA_real_, rt=NA_real_, pre=10)
# SUPPA2 does not bootstrap the reproducibility.
# SUPPA2 dPSI >= 0.20
sd50 <- measureup(list(sdk50,sds50,shk50,shs50), 'SUPPA2', fx=0.20, qt=NA_real_, rt=NA_real_, pre=50)
# SUPPA2 dPSI >= 0.10
sf50 <- measureup(list(sdk50,sds50,shk50,shs50), 'SUPPA2', fx=0.10, qt=NA_real_, rt=NA_real_, pre=50)
# SUPPA2 dPSI >= 0.05
sff50 <- measureup(list(sdk50,sds50,shk50,shs50), 'SUPPA2', fx=0.05, qt=NA_real_, rt=NA_real_, pre=50)
DRIMSeq
# DRIMSeq does not bootstrap the reproducibility.
# Gene-level
# Dprop >= 0.20
dd <- measureup(list(ddk,dds,dhk,dhs), 'DRIMSeq(g)', fx=0.20, qt=NA_real_, rt=NA_real_, pre=0)
# Dprop >= 0.10
df <- measureup(list(ddk,dds,dhk,dhs), 'DRIMSeq(g)', fx=0.10, qt=NA_real_, rt=NA_real_, pre=0)
# Dprop >= 0.05
dff <- measureup(list(ddk,dds,dhk,dhs), 'DRIMSeq(g)', fx=0.05, qt=NA_real_, rt=NA_real_, pre=0)
# Transcript-level
# Dprop >= 0.20
ddt <- measureup(list(ddkt,ddst,dhkt,dhst), 'DRIMSeq(t)', fx=0.20, qt=NA_real_, rt=NA_real_, pre=0)
# Dprop >= 0.10
dft <- measureup(list(ddkt,ddst,dhkt,dhst), 'DRIMSeq(t)', fx=0.10, qt=NA_real_, rt=NA_real_, pre=0)
# Dprop >= 0.05
dfft <- measureup(list(ddkt,ddst,dhkt,dhst), 'DRIMSeq(t)', fx=0.05, qt=NA_real_, rt=NA_real_, pre=0)
# DRIMSeq does not bootstrap the reproducibility.
# Gene-level
# Dprop >= 0.20
dd10 <- measureup(list(ddk10,dds10,dhk10,dhs10), 'DRIMSeq(g)', fx=0.20, qt=NA_real_, rt=NA_real_, pre=10)
# Dprop >= 0.10
df10 <- measureup(list(ddk10,dds10,dhk10,dhs10), 'DRIMSeq(g)', fx=0.10, qt=NA_real_, rt=NA_real_, pre=10)
# Dprop >= 0.05
dff10 <- measureup(list(ddk10,dds10,dhk10,dhs10), 'DRIMSeq(g)', fx=0.05, qt=NA_real_, rt=NA_real_, pre=10)
# Transcript-level
# Dprop >= 0.20
ddt10 <- measureup(list(ddkt10,ddst10,dhkt10,dhst10), 'DRIMSeq(t)', fx=0.20, qt=NA_real_, rt=NA_real_, pre=10)
# Dprop >= 0.10
dft10 <- measureup(list(ddkt10,ddst10,dhkt10,dhst10), 'DRIMSeq(t)', fx=0.10, qt=NA_real_, rt=NA_real_, pre=10)
# Dprop >= 0.05
dfft10 <- measureup(list(ddkt10,ddst10,dhkt10,dhst10), 'DRIMSeq(t)', fx=0.05, qt=NA_real_, rt=NA_real_, pre=10)
# DRIMSeq does not bootstrap the reproducibility.
# Gene-level
# Dprop >= 0.20
dd50 <- measureup(list(ddk50,dds50,dhk50,dhs50), 'DRIMSeq(g)', fx=0.20, qt=NA_real_, rt=NA_real_, pre=50)
# Dprop >= 0.10
df50 <- measureup(list(ddk50,dds50,dhk50,dhs50), 'DRIMSeq(g)', fx=0.10, qt=NA_real_, rt=NA_real_, pre=50)
# Dprop >= 0.05
dff50 <- measureup(list(ddk50,dds50,dhk50,dhs50), 'DRIMSeq(g)', fx=0.05, qt=NA_real_, rt=NA_real_, pre=50)
# Transcript-level
# Dprop >= 0.20
ddt50 <- measureup(list(ddkt50,ddst50,dhkt50,dhst50), 'DRIMSeq(t)', fx=0.20, qt=NA_real_, rt=NA_real_, pre=50)
# Dprop >= 0.10
dft50 <- measureup(list(ddkt50,ddst50,dhkt50,dhst50), 'DRIMSeq(t)', fx=0.10, qt=NA_real_, rt=NA_real_, pre=50)
# Dprop >= 0.05
dfft50 <- measureup(list(ddkt50,ddst50,dhkt50,dhst50), 'DRIMSeq(t)', fx=0.05, qt=NA_real_, rt=NA_real_, pre=50)
DEXSeq
# DEXSeq does not bootstrap the reproducibility.
# DEXSeq with and without stageR corrections. Dprop >= 0.20
xd <- measureup(list(xds, xsds, xhs, xshs), 'DEXSeq', fx=0.20, qt=NA_real_, rt=NA_real_, pre=0 )
# DEXSeq with and without stageR corrections. Dprop >= 0.10
xf <- measureup(list(xds, xsds, xhs, xshs), 'DEXSeq', fx=0.10, qt=NA_real_, rt=NA_real_, pre=0 )
# DEXSeq with and without stageR corrections. Dprop >= 0.05
xff <- measureup(list(xds, xsds, xhs, xshs), 'DEXSeq', fx=0.05, qt=NA_real_, rt=NA_real_, pre=0 )
# DEXSeq does not bootstrap the reproducibility.
# DEXSeq with and without stageR corrections. Dprop >= 0.20
xd10 <- measureup(list(xds10, xsds10, xhs10, xshs10), 'DEXSeq', fx=0.20, qt=NA_real_, rt=NA_real_, pre=10 )
# DEXSeq with and without stageR corrections. Dprop >= 0.10
xf10 <- measureup(list(xds10, xsds10, xhs10, xshs10), 'DEXSeq', fx=0.10, qt=NA_real_, rt=NA_real_, pre=10 )
# DEXSeq with and without stageR corrections. Dprop >= 0.05
xff10 <- measureup(list(xds10, xsds10, xhs10, xshs10), 'DEXSeq', fx=0.05, qt=NA_real_, rt=NA_real_, pre=10 )
# DEXSeq does not bootstrap the reproducibility.
# DEXSeq with and without stageR corrections. Dprop >= 0.20
xd50 <- measureup(list(xds50, xsds50, xhs50, xshs50), 'DEXSeq', fx=0.20, qt=NA_real_, rt=NA_real_, pre=50 )
# DEXSeq with and without stageR corrections. Dprop >= 0.10
xf50 <- measureup(list(xds50, xsds50, xhs50, xshs50), 'DEXSeq', fx=0.10, qt=NA_real_, rt=NA_real_, pre=50 )
# DEXSeq with and without stageR corrections. Dprop >= 0.05
xff50 <- measureup(list(xds50, xsds50, xhs50, xshs50), 'DEXSeq', fx=0.05, qt=NA_real_, rt=NA_real_, pre=50 )
Merge results into single table.
# Easier to work with ggplot2 if everything is in one table.
mega <- rbindlist(list(rgd, rtd, rgd1, rtd1, rgd2, rtd2, rgd3, rtd3,
rgf, rtf, rgf1, rtf1, rgf2, rtf2, rgf3, rtf3,
rgff, rtff, rgff1, rtff1, rgff2, rtff2, rgff3, rtff3,
sd, sf, sff, dd, df, dff, ddt, dft, dfft, xd, xf, xff,
rgd10, rtd10, rgd110, rtd110, rgd210, rtd210, rgd310, rtd310,
rgf10, rtf10, rgf110, rtf110, rgf210, rtf210, rgf310, rtf310,
rgff10, rtff10, rgff110, rtff110, rgff210, rtff210, rgff310, rtff310,
sd10, sf10, sff10, dd10, df10, dff10, ddt10, dft10, dfft10, xd10, xf10, xff10,
rgd50, rtd50, rgd150, rtd150, rgd250, rtd250, rgd350, rtd350,
rgf50, rtf50, rgf150, rtf150, rgf250, rtf250, rgf350, rtf350,
rgff50, rtff50, rgff150, rtff150, rgff250, rtff250, rgff350, rtff350,
sd50, sf50, sff50, dd50, df50, dff50, ddt50, dft50, dfft50, xd50, xf50, xff50))
Plot.
# Reduce the number of points to include in the MS figure.
ms <- mega[! mega$reproducibility %in% c('Qrep >= 0.9 Rrep >= 0.75', 'Qrep >= 0.85 Rrep >= 0.65'), ]
ms <- ms[quantification == 'Salmon', ]
ms <- ms[prefilter == 0 | prefilter == 50]
ms <- ms[tool != 'DEXsgr'] # stageR is a post-processing that could be applied to any tool. For fairness and simplicity, show only vanilla DEXSeq.
ms$prefilter <- as.character(ms$prefilter)
pdf('analysis_sim_v3_perfHS.pdf')
print( ggplot(ms[species=='Human', ]) + gth3 +
facet_grid(type ~ tool, switch='y') +
geom_jitter(aes(x=effect, y=value, colour=reproducibility, shape=prefilter), width=0.1) +
scale_y_continuous(expand=c(0,0), breaks=c(0, 0.2, 0.4, 0.6, 0.8, 1)) +
coord_cartesian(ylim=c(0, 1)) +
scale_shape_manual(values=c(21, 4)) +
scale_colour_manual(values=c('Qrep NA Rrep NA' = 'black',
'Qrep >= 0.95 Rrep >= 0.85' = 'red',
'Qrep >= 0.8 Rrep >= 0.55' = 'blue')) +
labs(title='simulated human data', y='') +
guides(shape=guide_legend(nrow=2), colour=guide_legend(nrow=3)) +
theme(legend.position = "bottom") )
dev.off()
## quartz_off_screen
## 2
pdf('analysis_sim_v3_perfDS.pdf')
print( ggplot(ms[species=='Fruitfly', ]) + gth3 +
facet_grid(type ~ tool, switch='y') +
geom_jitter(aes(x=effect, y=value, colour=reproducibility, shape=prefilter), alpha=0.7, width=0.1) +
scale_y_continuous(expand=c(0,0), breaks=c(0, 0.2, 0.4, 0.6, 0.8, 1)) +
coord_cartesian(ylim=c(0, 1)) +
scale_shape_manual(values=c(21, 4)) +
scale_colour_manual(values=c('Qrep NA Rrep NA' = 'black',
'Qrep >= 0.95 Rrep >= 0.85' = 'red',
'Qrep >= 0.8 Rrep >= 0.55' = 'blue')) +
labs(title='simulated fruitfly data', y='') +
guides(shape=guide_legend(nrow=2), colour=guide_legend(nrow=3)) +
theme(legend.position = "bottom") )
dev.off()
## quartz_off_screen
## 2
# Human set quantified with Salmon.
hs <- ggplot(mega[quantification=='Salmon' & species=='Human', ]) + gth3 +
facet_grid(type ~ tool, switch='y') +
transition_states(prefilter, transition_length = 0.5, state_length = 3) +
geom_point(aes(x=effect, y=value, colour=reproducibility, shape=tool), shape=20) +
scale_y_continuous(expand=c(0,0), breaks=c(0, 0.2, 0.4, 0.6, 0.8, 1), sec.axis=dup_axis(name="")) +
coord_cartesian(ylim=c(0, 1)) +
scale_colour_manual(values=c('Qrep NA Rrep NA' = 'blue',
'Qrep >= 0.95 Rrep >= 0.85' = 'darkred',
'Qrep >= 0.9 Rrep >= 0.75' = 'red',
'Qrep >= 0.85 Rrep >= 0.65' = 'orange',
'Qrep >= 0.8 Rrep >= 0.55' = 'gold')) +
labs(title='DTU performance at typical significance level of 0.05', subtitle='(simulated human dataset quantified with Salmon)', y="Abundance prefilter : {closest_state}") +
guides(colour=guide_legend(nrow=2)) +
theme(legend.position = "bottom")
anim_save("analysis_sim_v3_perfHS.gif", hs)
# Fly set quantified with Salmon.
ds <- ggplot(mega[quantification=='Salmon' & species=='Fruitfly', ]) + gth3 +
facet_grid(type ~ tool, switch='y', scale="free_y") +
transition_states(prefilter, transition_length = 0.5, state_length = 3) +
geom_point(aes(x=effect, y=value, colour=reproducibility, shape=tool), shape=20) +
scale_y_continuous(expand=c(0,0), breaks=c(0, 0.2, 0.4, 0.6, 0.8, 1), sec.axis=dup_axis(name="")) +
coord_cartesian(ylim=c(0, 1)) +
scale_colour_manual(values=c('Qrep NA Rrep NA' = 'blue',
'Qrep >= 0.95 Rrep >= 0.85' = 'darkred',
'Qrep >= 0.9 Rrep >= 0.75' = 'red',
'Qrep >= 0.85 Rrep >= 0.65' = 'orange',
'Qrep >= 0.8 Rrep >= 0.55' = 'gold')) +
labs(title='DTU performance at typical significance level of 0.05', subtitle='(simulated fruitfly dataset quantified with Salmon)', y="Abundance prefilter : {closest_state}") +
guides(colour=guide_legend(nrow=2)) +
theme(legend.position = "bottom")
anim_save("analysis_sim_v3_perfDS.gif", ds)
# Human set quantified with Kallisto.
hk <- ggplot(mega[quantification=='Kallisto' & species=='Human', ]) + gth3 +
facet_grid(type ~ tool, switch='y', scale="free_y") +
transition_states(prefilter, transition_length = 0.5, state_length = 3) +
geom_point(aes(x=effect, y=value, colour=reproducibility, shape=tool), shape=20) +
scale_y_continuous(expand=c(0,0), breaks=c(0, 0.2, 0.4, 0.6, 0.8, 1), sec.axis=dup_axis(name="")) +
coord_cartesian(ylim=c(0, 1)) +
scale_colour_manual(values=c('Qrep NA Rrep NA' = 'blue',
'Qrep >= 0.95 Rrep >= 0.85' = 'darkred',
'Qrep >= 0.9 Rrep >= 0.75' = 'red',
'Qrep >= 0.85 Rrep >= 0.65' = 'orange',
'Qrep >= 0.8 Rrep >= 0.55' = 'gold')) +
labs(title='DTU performance at typical significance level of 0.05', subtitle='(simulated human dataset quantified with Kallisto)', y="Abundance prefilter : {closest_state}") +
guides(colour=guide_legend(nrow=2)) +
theme(legend.position = "bottom")
anim_save("analysis_sim_v3_perfHK.gif", hk)
# Fruitfly set quantified with Kallisto.
dk <- ggplot(mega[quantification=='Kallisto' & species=='Fruitfly', ]) + gth3 +
facet_grid(type ~ tool, switch='y', scale="free_y") +
transition_states(prefilter, transition_length = 0.5, state_length = 3) +
geom_point(aes(x=effect, y=value, colour=reproducibility, shape=tool), shape=20) +
scale_y_continuous(expand=c(0,0), breaks=c(0, 0.2, 0.4, 0.6, 0.8, 1), sec.axis=dup_axis(name="")) +
coord_cartesian(ylim=c(0, 1)) +
scale_colour_manual(values=c('Qrep NA Rrep NA' = 'blue',
'Qrep >= 0.95 Rrep >= 0.85' = 'darkred',
'Qrep >= 0.9 Rrep >= 0.75' = 'red',
'Qrep >= 0.85 Rrep >= 0.65' = 'orange',
'Qrep >= 0.8 Rrep >= 0.55' = 'gold')) +
labs(title='DTU performance at typical significance level of 0.05', subtitle='(simulated fruitfly dataset quantified with Kallisto)', y="Abundance prefilter : {closest_state}") +
guides(colour=guide_legend(nrow=2)) +
theme(legend.position = "bottom")
anim_save("analysis_sim_v3_perfDK.gif", dk)
It is not easy to compare the points in the plots above, as many are quite close. In the tables below, we rank the tools, for each combination of prefilter and postfilter values.
For simpliciy, for RATs we are using the values from the strictest (ie. default) reproducibility thresholds when ranking (darkest red points in the plots). We are also limiting the ranking to the Salmon quantification, as Salmon and Kallisto resulted in practically indistinguishable preformance results.
As the ranks change easily with changes in the parameters, we use the sum of the ranks across the parameter combinations as an indication of which tool ranks high most consistently.
Without prefilter
tallyh = c('RATs(t)'=0, 'RATs(g)'=0, 'DRIMSeq(t)'=0, 'DRIMSeq(g)'=0, 'SUPPA2'=0, 'DEXSeq'=0)
sensh = c('RATs(t)'=0, 'RATs(g)'=0, 'DRIMSeq(t)'=0, 'DRIMSeq(g)'=0, 'SUPPA2'=0, 'DEXSeq'=0)
fdrh = c('RATs(t)'=0, 'RATs(g)'=0, 'DRIMSeq(t)'=0, 'DRIMSeq(g)'=0, 'SUPPA2'=0, 'DEXSeq'=0)
mcch = c('RATs(t)'=0, 'RATs(g)'=0, 'DRIMSeq(t)'=0, 'DRIMSeq(g)'=0, 'SUPPA2'=0, 'DEXSeq'=0)
tmp <- mega %>%
filter(tool != 'DEXsgr') %>%
filter(!reproducibility %in% c('Qrep >= 0.9 Rrep >= 0.75', 'Qrep >= 0.85 Rrep >= 0.65', 'Qrep >= 0.8 Rrep >= 0.55')) %>%
filter(quantification == 'Salmon') %>%
filter(species == 'Human') %>%
filter(prefilter == 0)
# D>=0.2
tmp2 <- tmp %>%
filter(effect == 'D >= 0.2') %>%
filter(type == 'Sensitivity') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 SUPPA2 0.612 D >= 0.2 Sensitivity
## 2 DEXSeq 0.570 D >= 0.2 Sensitivity
## 3 RATs(g) 0.563 D >= 0.2 Sensitivity
## 4 RATs(t) 0.549 D >= 0.2 Sensitivity
## 5 DRIMSeq(t) 0.513 D >= 0.2 Sensitivity
## 6 DRIMSeq(g) 0.505 D >= 0.2 Sensitivity
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
sensh[as.character(tmp2$tool[i])] = sensh[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.2') %>%
filter(type == 'FDR') %>%
arrange(value) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 RATs(t) 0.021 D >= 0.2 FDR
## 2 DRIMSeq(t) 0.027 D >= 0.2 FDR
## 3 DEXSeq 0.032 D >= 0.2 FDR
## 4 RATs(g) 0.039 D >= 0.2 FDR
## 5 DRIMSeq(g) 0.062 D >= 0.2 FDR
## 6 SUPPA2 0.329 D >= 0.2 FDR
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
fdrh[as.character(tmp2$tool[i])] = fdrh[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.2') %>%
filter(type == 'MCC') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 DEXSeq 0.734 D >= 0.2 MCC
## 2 RATs(g) 0.726 D >= 0.2 MCC
## 3 RATs(t) 0.724 D >= 0.2 MCC
## 4 DRIMSeq(t) 0.691 D >= 0.2 MCC
## 5 DRIMSeq(g) 0.672 D >= 0.2 MCC
## 6 SUPPA2 0.623 D >= 0.2 MCC
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
mcch[as.character(tmp2$tool[i])] = mcch[as.character(tmp2$tool[i])] + i
}
# D>=0.1
tmp2 <- tmp %>%
filter(effect == 'D >= 0.1') %>%
filter(type == 'Sensitivity') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 SUPPA2 0.788 D >= 0.1 Sensitivity
## 2 RATs(g) 0.716 D >= 0.1 Sensitivity
## 3 RATs(t) 0.692 D >= 0.1 Sensitivity
## 4 DEXSeq 0.599 D >= 0.1 Sensitivity
## 5 DRIMSeq(t) 0.558 D >= 0.1 Sensitivity
## 6 DRIMSeq(g) 0.555 D >= 0.1 Sensitivity
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
sensh[as.character(tmp2$tool[i])] = sensh[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.1') %>%
filter(type == 'FDR') %>%
arrange(value) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 DRIMSeq(t) 0.061 D >= 0.1 FDR
## 2 DEXSeq 0.070 D >= 0.1 FDR
## 3 RATs(t) 0.071 D >= 0.1 FDR
## 4 DRIMSeq(g) 0.114 D >= 0.1 FDR
## 5 RATs(g) 0.282 D >= 0.1 FDR
## 6 SUPPA2 0.509 D >= 0.1 FDR
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
fdrh[as.character(tmp2$tool[i])] = fdrh[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.1') %>%
filter(type == 'MCC') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 RATs(t) 0.793 D >= 0.1 MCC
## 2 DEXSeq 0.737 D >= 0.1 MCC
## 3 DRIMSeq(t) 0.708 D >= 0.1 MCC
## 4 RATs(g) 0.703 D >= 0.1 MCC
## 5 DRIMSeq(g) 0.683 D >= 0.1 MCC
## 6 SUPPA2 0.598 D >= 0.1 MCC
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
mcch[as.character(tmp2$tool[i])] = mcch[as.character(tmp2$tool[i])] + i
}
# D>=0.05
tmp2 <- tmp %>%
filter(effect == 'D >= 0.05') %>%
filter(type == 'Sensitivity') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 SUPPA2 0.871 D >= 0.05 Sensitivity
## 2 RATs(g) 0.864 D >= 0.05 Sensitivity
## 3 RATs(t) 0.777 D >= 0.05 Sensitivity
## 4 DEXSeq 0.602 D >= 0.05 Sensitivity
## 5 DRIMSeq(t) 0.574 D >= 0.05 Sensitivity
## 6 DRIMSeq(g) 0.566 D >= 0.05 Sensitivity
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
sensh[as.character(tmp2$tool[i])] = sensh[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.05') %>%
filter(type == 'FDR') %>%
arrange(value) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 DRIMSeq(t) 0.103 D >= 0.05 FDR
## 2 DEXSeq 0.119 D >= 0.05 FDR
## 3 DRIMSeq(g) 0.170 D >= 0.05 FDR
## 4 RATs(t) 0.306 D >= 0.05 FDR
## 5 SUPPA2 0.645 D >= 0.05 FDR
## 6 RATs(g) 0.670 D >= 0.05 FDR
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
fdrh[as.character(tmp2$tool[i])] = fdrh[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.05') %>%
filter(type == 'MCC') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 RATs(t) 0.720 D >= 0.05 MCC
## 2 DEXSeq 0.718 D >= 0.05 MCC
## 3 DRIMSeq(t) 0.699 D >= 0.05 MCC
## 4 DRIMSeq(g) 0.664 D >= 0.05 MCC
## 5 SUPPA2 0.524 D >= 0.05 MCC
## 6 RATs(g) 0.499 D >= 0.05 MCC
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
mcch[as.character(tmp2$tool[i])] = mcch[as.character(tmp2$tool[i])] + i
}
Prefilter > 10
tmp <- mega %>%
filter(tool != 'DEXsgr') %>%
filter(!reproducibility %in% c('Qrep >= 0.9 Rrep >= 0.75', 'Qrep >= 0.85 Rrep >= 0.65', 'Qrep >= 0.8 Rrep >= 0.55')) %>%
filter(quantification == 'Salmon') %>%
filter(species == 'Human') %>%
filter(prefilter == 10)
# D>=0.2
tmp2 <- tmp %>%
filter(effect == 'D >= 0.2') %>%
filter(type == 'Sensitivity') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 SUPPA2 0.608 D >= 0.2 Sensitivity
## 2 DRIMSeq(g) 0.596 D >= 0.2 Sensitivity
## 3 DRIMSeq(t) 0.589 D >= 0.2 Sensitivity
## 4 DEXSeq 0.567 D >= 0.2 Sensitivity
## 5 RATs(g) 0.563 D >= 0.2 Sensitivity
## 6 RATs(t) 0.549 D >= 0.2 Sensitivity
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
sensh[as.character(tmp2$tool[i])] = sensh[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.2') %>%
filter(type == 'FDR') %>%
arrange(value) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 RATs(t) 0.014 D >= 0.2 FDR
## 2 RATs(g) 0.016 D >= 0.2 FDR
## 3 DEXSeq 0.027 D >= 0.2 FDR
## 4 SUPPA2 0.046 D >= 0.2 FDR
## 5 DRIMSeq(t) 0.050 D >= 0.2 FDR
## 6 DRIMSeq(g) 0.071 D >= 0.2 FDR
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
fdrh[as.character(tmp2$tool[i])] = fdrh[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.2') %>%
filter(type == 'MCC') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 SUPPA2 0.750 D >= 0.2 MCC
## 2 RATs(g) 0.736 D >= 0.2 MCC
## 3 DEXSeq 0.734 D >= 0.2 MCC
## 4 DRIMSeq(t) 0.729 D >= 0.2 MCC
## 5 RATs(t) 0.727 D >= 0.2 MCC
## 6 DRIMSeq(g) 0.725 D >= 0.2 MCC
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
mcch[as.character(tmp2$tool[i])] = mcch[as.character(tmp2$tool[i])] + i
}
# D>=0.1
tmp2 <- tmp %>%
filter(effect == 'D >= 0.1') %>%
filter(type == 'Sensitivity') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 SUPPA2 0.787 D >= 0.1 Sensitivity
## 2 RATs(g) 0.716 D >= 0.1 Sensitivity
## 3 RATs(t) 0.692 D >= 0.1 Sensitivity
## 4 DRIMSeq(g) 0.651 D >= 0.1 Sensitivity
## 5 DRIMSeq(t) 0.641 D >= 0.1 Sensitivity
## 6 DEXSeq 0.599 D >= 0.1 Sensitivity
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
sensh[as.character(tmp2$tool[i])] = sensh[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.1') %>%
filter(type == 'FDR') %>%
arrange(value) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 DEXSeq 0.058 D >= 0.1 FDR
## 2 RATs(t) 0.061 D >= 0.1 FDR
## 3 DRIMSeq(t) 0.109 D >= 0.1 FDR
## 4 DRIMSeq(g) 0.177 D >= 0.1 FDR
## 5 RATs(g) 0.211 D >= 0.1 FDR
## 6 SUPPA2 0.255 D >= 0.1 FDR
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
fdrh[as.character(tmp2$tool[i])] = fdrh[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.1') %>%
filter(type == 'MCC') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 RATs(t) 0.798 D >= 0.1 MCC
## 2 SUPPA2 0.750 D >= 0.1 MCC
## 3 DEXSeq 0.742 D >= 0.1 MCC
## 4 RATs(g) 0.739 D >= 0.1 MCC
## 5 DRIMSeq(t) 0.735 D >= 0.1 MCC
## 6 DRIMSeq(g) 0.707 D >= 0.1 MCC
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
mcch[as.character(tmp2$tool[i])] = mcch[as.character(tmp2$tool[i])] + i
}
# D>=0.05
tmp2 <- tmp %>%
filter(effect == 'D >= 0.05') %>%
filter(type == 'Sensitivity') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 SUPPA2 0.878 D >= 0.05 Sensitivity
## 2 RATs(g) 0.864 D >= 0.05 Sensitivity
## 3 RATs(t) 0.778 D >= 0.05 Sensitivity
## 4 DRIMSeq(g) 0.656 D >= 0.05 Sensitivity
## 5 DRIMSeq(t) 0.649 D >= 0.05 Sensitivity
## 6 DEXSeq 0.601 D >= 0.05 Sensitivity
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
sensh[as.character(tmp2$tool[i])] = sensh[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.05') %>%
filter(type == 'FDR') %>%
arrange(value) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 DEXSeq 0.106 D >= 0.05 FDR
## 2 DRIMSeq(t) 0.170 D >= 0.05 FDR
## 3 DRIMSeq(g) 0.266 D >= 0.05 FDR
## 4 RATs(t) 0.303 D >= 0.05 FDR
## 5 SUPPA2 0.561 D >= 0.05 FDR
## 6 RATs(g) 0.655 D >= 0.05 FDR
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
fdrh[as.character(tmp2$tool[i])] = fdrh[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.05') %>%
filter(type == 'MCC') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 DEXSeq 0.723 D >= 0.05 MCC
## 2 RATs(t) 0.722 D >= 0.05 MCC
## 3 DRIMSeq(t) 0.710 D >= 0.05 MCC
## 4 DRIMSeq(g) 0.663 D >= 0.05 MCC
## 5 SUPPA2 0.589 D >= 0.05 MCC
## 6 RATs(g) 0.513 D >= 0.05 MCC
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
mcch[as.character(tmp2$tool[i])] = mcch[as.character(tmp2$tool[i])] + i
}
Prefilter > 50
tmp <- mega %>%
filter(tool != 'DEXsgr') %>%
filter(!reproducibility %in% c('Qrep >= 0.9 Rrep >= 0.75', 'Qrep >= 0.85 Rrep >= 0.65', 'Qrep >= 0.8 Rrep >= 0.55')) %>%
filter(quantification == 'Salmon') %>%
filter(species == 'Human') %>%
filter(prefilter == 50)
# D>=0.2
tmp2 <- tmp %>%
filter(effect == 'D >= 0.2') %>%
filter(type == 'Sensitivity') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 DRIMSeq(g) 0.632 D >= 0.2 Sensitivity
## 2 DRIMSeq(t) 0.616 D >= 0.2 Sensitivity
## 3 SUPPA2 0.600 D >= 0.2 Sensitivity
## 4 RATs(g) 0.562 D >= 0.2 Sensitivity
## 5 RATs(t) 0.549 D >= 0.2 Sensitivity
## 6 DEXSeq 0.523 D >= 0.2 Sensitivity
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
sensh[as.character(tmp2$tool[i])] = sensh[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.2') %>%
filter(type == 'FDR') %>%
arrange(value) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 RATs(g) 0.004 D >= 0.2 FDR
## 2 RATs(t) 0.004 D >= 0.2 FDR
## 3 DRIMSeq(t) 0.033 D >= 0.2 FDR
## 4 DRIMSeq(g) 0.046 D >= 0.2 FDR
## 5 SUPPA2 0.053 D >= 0.2 FDR
## 6 DEXSeq 0.053 D >= 0.2 FDR
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
fdrh[as.character(tmp2$tool[i])] = fdrh[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.2') %>%
filter(type == 'MCC') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 DRIMSeq(g) 0.754 D >= 0.2 MCC
## 2 DRIMSeq(t) 0.749 D >= 0.2 MCC
## 3 SUPPA2 0.748 D >= 0.2 MCC
## 4 RATs(g) 0.740 D >= 0.2 MCC
## 5 RATs(t) 0.731 D >= 0.2 MCC
## 6 DEXSeq 0.694 D >= 0.2 MCC
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
mcch[as.character(tmp2$tool[i])] = mcch[as.character(tmp2$tool[i])] + i
}
# D>=0.1
tmp2 <- tmp %>%
filter(effect == 'D >= 0.1') %>%
filter(type == 'Sensitivity') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 SUPPA2 0.776 D >= 0.1 Sensitivity
## 2 RATs(g) 0.715 D >= 0.1 Sensitivity
## 3 RATs(t) 0.692 D >= 0.1 Sensitivity
## 4 DRIMSeq(g) 0.678 D >= 0.1 Sensitivity
## 5 DRIMSeq(t) 0.665 D >= 0.1 Sensitivity
## 6 DEXSeq 0.552 D >= 0.1 Sensitivity
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
sensh[as.character(tmp2$tool[i])] = sensh[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.1') %>%
filter(type == 'FDR') %>%
arrange(value) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 RATs(t) 0.036 D >= 0.1 FDR
## 2 RATs(g) 0.073 D >= 0.1 FDR
## 3 DRIMSeq(t) 0.081 D >= 0.1 FDR
## 4 DEXSeq 0.089 D >= 0.1 FDR
## 5 DRIMSeq(g) 0.126 D >= 0.1 FDR
## 6 SUPPA2 0.149 D >= 0.1 FDR
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
fdrh[as.character(tmp2$tool[i])] = fdrh[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.1') %>%
filter(type == 'MCC') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 RATs(t) 0.809 D >= 0.1 MCC
## 2 SUPPA2 0.807 D >= 0.1 MCC
## 3 RATs(g) 0.806 D >= 0.1 MCC
## 4 DRIMSeq(t) 0.758 D >= 0.1 MCC
## 5 DRIMSeq(g) 0.743 D >= 0.1 MCC
## 6 DEXSeq 0.698 D >= 0.1 MCC
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
mcch[as.character(tmp2$tool[i])] = mcch[as.character(tmp2$tool[i])] + i
}
# D>=0.05
tmp2 <- tmp %>%
filter(effect == 'D >= 0.05') %>%
filter(type == 'Sensitivity') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 SUPPA2 0.902 D >= 0.05 Sensitivity
## 2 RATs(g) 0.864 D >= 0.05 Sensitivity
## 3 RATs(t) 0.777 D >= 0.05 Sensitivity
## 4 DRIMSeq(g) 0.683 D >= 0.05 Sensitivity
## 5 DRIMSeq(t) 0.671 D >= 0.05 Sensitivity
## 6 DEXSeq 0.555 D >= 0.05 Sensitivity
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
sensh[as.character(tmp2$tool[i])] = sensh[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.05') %>%
filter(type == 'FDR') %>%
arrange(value) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 DRIMSeq(t) 0.122 D >= 0.05 FDR
## 2 DEXSeq 0.146 D >= 0.05 FDR
## 3 DRIMSeq(g) 0.178 D >= 0.05 FDR
## 4 RATs(t) 0.306 D >= 0.05 FDR
## 5 SUPPA2 0.562 D >= 0.05 FDR
## 6 RATs(g) 0.670 D >= 0.05 FDR
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
fdrh[as.character(tmp2$tool[i])] = fdrh[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.05') %>%
filter(type == 'MCC') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 DRIMSeq(t) 0.740 D >= 0.05 MCC
## 2 RATs(t) 0.720 D >= 0.05 MCC
## 3 DRIMSeq(g) 0.718 D >= 0.05 MCC
## 4 DEXSeq 0.676 D >= 0.05 MCC
## 5 SUPPA2 0.613 D >= 0.05 MCC
## 6 RATs(g) 0.499 D >= 0.05 MCC
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
mcch[as.character(tmp2$tool[i])] = mcch[as.character(tmp2$tool[i])] + i
}
Cumulative rank
print( data.frame(Overall = tallyh,
Sensitivity = sensh,
FDR = fdrh,
MCC = mcch) )
## Overall Sensitivity FDR MCC
## RATs(t) 76 33 22 21
## RATs(g) 98 24 37 37
## DRIMSeq(t) 90 40 21 29
## DRIMSeq(g) 113 37 37 39
## SUPPA2 94 11 48 35
## DEXSeq 96 44 24 28
Although the tool occupying the top rank changes easily with the filtering parameters of the comparison, a crude sum of the ranks from the previous tables across the comparisons shows that overall the transcript-level method in RATs consistently ranks higher than the other tools, even when it is not in the top spot.
Without prefilter
tallyd = c('RATs(t)'=0, 'RATs(g)'=0, 'DRIMSeq(t)'=0, 'DRIMSeq(g)'=0, 'SUPPA2'=0, 'DEXSeq'=0)
sensd = c('RATs(t)'=0, 'RATs(g)'=0, 'DRIMSeq(t)'=0, 'DRIMSeq(g)'=0, 'SUPPA2'=0, 'DEXSeq'=0)
fdrd = c('RATs(t)'=0, 'RATs(g)'=0, 'DRIMSeq(t)'=0, 'DRIMSeq(g)'=0, 'SUPPA2'=0, 'DEXSeq'=0)
mccd = c('RATs(t)'=0, 'RATs(g)'=0, 'DRIMSeq(t)'=0, 'DRIMSeq(g)'=0, 'SUPPA2'=0, 'DEXSeq'=0)
tmp <- mega %>%
filter(tool != 'DEXsgr') %>%
filter(!reproducibility %in% c('Qrep >= 0.9 Rrep >= 0.75', 'Qrep >= 0.85 Rrep >= 0.65', 'Qrep >= 0.8 Rrep >= 0.55')) %>%
filter(quantification == 'Salmon') %>%
filter(species == 'Fruitfly') %>%
filter(prefilter == 0)
# D>=0.2
tmp2 <- tmp %>%
filter(effect == 'D >= 0.2') %>%
filter(type == 'Sensitivity') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 SUPPA2 0.615 D >= 0.2 Sensitivity
## 2 DRIMSeq(g) 0.601 D >= 0.2 Sensitivity
## 3 DRIMSeq(t) 0.595 D >= 0.2 Sensitivity
## 4 DEXSeq 0.577 D >= 0.2 Sensitivity
## 5 RATs(g) 0.535 D >= 0.2 Sensitivity
## 6 RATs(t) 0.526 D >= 0.2 Sensitivity
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
sensd[as.character(tmp2$tool[i])] = sensd[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.2') %>%
filter(type == 'FDR') %>%
arrange(value) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 RATs(t) 0.009 D >= 0.2 FDR
## 2 RATs(g) 0.015 D >= 0.2 FDR
## 3 DEXSeq 0.015 D >= 0.2 FDR
## 4 DRIMSeq(t) 0.029 D >= 0.2 FDR
## 5 DRIMSeq(g) 0.036 D >= 0.2 FDR
## 6 SUPPA2 0.128 D >= 0.2 FDR
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
fdrd[as.character(tmp2$tool[i])] = fdrd[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.2') %>%
filter(type == 'MCC') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 DEXSeq 0.741 D >= 0.2 MCC
## 2 DRIMSeq(g) 0.721 D >= 0.2 MCC
## 3 DRIMSeq(t) 0.721 D >= 0.2 MCC
## 4 SUPPA2 0.716 D >= 0.2 MCC
## 5 RATs(g) 0.713 D >= 0.2 MCC
## 6 RATs(t) 0.709 D >= 0.2 MCC
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
mccd[as.character(tmp2$tool[i])] = mccd[as.character(tmp2$tool[i])] + i
}
# D>=0.1
tmp2 <- tmp %>%
filter(effect == 'D >= 0.1') %>%
filter(type == 'Sensitivity') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 SUPPA2 0.765 D >= 0.1 Sensitivity
## 2 RATs(g) 0.666 D >= 0.1 Sensitivity
## 3 RATs(t) 0.653 D >= 0.1 Sensitivity
## 4 DRIMSeq(t) 0.649 D >= 0.1 Sensitivity
## 5 DRIMSeq(g) 0.640 D >= 0.1 Sensitivity
## 6 DEXSeq 0.616 D >= 0.1 Sensitivity
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
sensd[as.character(tmp2$tool[i])] = sensd[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.1') %>%
filter(type == 'FDR') %>%
arrange(value) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 RATs(t) 0.024 D >= 0.1 FDR
## 2 DEXSeq 0.033 D >= 0.1 FDR
## 3 RATs(g) 0.043 D >= 0.1 FDR
## 4 DRIMSeq(t) 0.056 D >= 0.1 FDR
## 5 DRIMSeq(g) 0.071 D >= 0.1 FDR
## 6 SUPPA2 0.223 D >= 0.1 FDR
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
fdrd[as.character(tmp2$tool[i])] = fdrd[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.1') %>%
filter(type == 'MCC') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 RATs(t) 0.787 D >= 0.1 MCC
## 2 RATs(g) 0.786 D >= 0.1 MCC
## 3 DEXSeq 0.759 D >= 0.1 MCC
## 4 SUPPA2 0.753 D >= 0.1 MCC
## 5 DRIMSeq(t) 0.743 D >= 0.1 MCC
## 6 DRIMSeq(g) 0.729 D >= 0.1 MCC
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
mccd[as.character(tmp2$tool[i])] = mccd[as.character(tmp2$tool[i])] + i
}
# D>=0.05
tmp2 <- tmp %>%
filter(effect == 'D >= 0.05') %>%
filter(type == 'Sensitivity') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 RATs(g) 0.801 D >= 0.05 Sensitivity
## 2 SUPPA2 0.786 D >= 0.05 Sensitivity
## 3 RATs(t) 0.754 D >= 0.05 Sensitivity
## 4 DRIMSeq(t) 0.652 D >= 0.05 Sensitivity
## 5 DRIMSeq(g) 0.642 D >= 0.05 Sensitivity
## 6 DEXSeq 0.621 D >= 0.05 Sensitivity
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
sensd[as.character(tmp2$tool[i])] = sensd[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.05') %>%
filter(type == 'FDR') %>%
arrange(value) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 DEXSeq 0.052 D >= 0.05 FDR
## 2 DRIMSeq(t) 0.072 D >= 0.05 FDR
## 3 DRIMSeq(g) 0.099 D >= 0.05 FDR
## 4 RATs(t) 0.118 D >= 0.05 FDR
## 5 RATs(g) 0.222 D >= 0.05 FDR
## 6 SUPPA2 0.248 D >= 0.05 FDR
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
fdrd[as.character(tmp2$tool[i])] = fdrd[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.05') %>%
filter(type == 'MCC') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 RATs(t) 0.803 D >= 0.05 MCC
## 2 RATs(g) 0.773 D >= 0.05 MCC
## 3 DEXSeq 0.754 D >= 0.05 MCC
## 4 SUPPA2 0.750 D >= 0.05 MCC
## 5 DRIMSeq(t) 0.736 D >= 0.05 MCC
## 6 DRIMSeq(g) 0.715 D >= 0.05 MCC
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
mccd[as.character(tmp2$tool[i])] = mccd[as.character(tmp2$tool[i])] + i
}
Prefilter > 10
tmp <- mega %>%
filter(tool != 'DEXsgr') %>%
filter(!reproducibility %in% c('Qrep >= 0.9 Rrep >= 0.75', 'Qrep >= 0.85 Rrep >= 0.65', 'Qrep >= 0.8 Rrep >= 0.55')) %>%
filter(quantification == 'Salmon') %>%
filter(species == 'Fruitfly') %>%
filter(prefilter == 10)
# D>=0.2
tmp2 <- tmp %>%
filter(effect == 'D >= 0.2') %>%
filter(type == 'Sensitivity') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 DRIMSeq(g) 0.635 D >= 0.2 Sensitivity
## 2 SUPPA2 0.618 D >= 0.2 Sensitivity
## 3 DRIMSeq(t) 0.614 D >= 0.2 Sensitivity
## 4 DEXSeq 0.588 D >= 0.2 Sensitivity
## 5 RATs(g) 0.535 D >= 0.2 Sensitivity
## 6 RATs(t) 0.526 D >= 0.2 Sensitivity
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
sensd[as.character(tmp2$tool[i])] = sensd[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.2') %>%
filter(type == 'FDR') %>%
arrange(value) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 RATs(t) 0.008 D >= 0.2 FDR
## 2 RATs(g) 0.009 D >= 0.2 FDR
## 3 DEXSeq 0.018 D >= 0.2 FDR
## 4 SUPPA2 0.026 D >= 0.2 FDR
## 5 DRIMSeq(t) 0.029 D >= 0.2 FDR
## 6 DRIMSeq(g) 0.033 D >= 0.2 FDR
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
fdrd[as.character(tmp2$tool[i])] = fdrd[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.2') %>%
filter(type == 'MCC') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 SUPPA2 0.759 D >= 0.2 MCC
## 2 DEXSeq 0.747 D >= 0.2 MCC
## 3 DRIMSeq(g) 0.740 D >= 0.2 MCC
## 4 DRIMSeq(t) 0.727 D >= 0.2 MCC
## 5 RATs(g) 0.715 D >= 0.2 MCC
## 6 RATs(t) 0.709 D >= 0.2 MCC
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
mccd[as.character(tmp2$tool[i])] = mccd[as.character(tmp2$tool[i])] + i
}
# D>=0.1
tmp2 <- tmp %>%
filter(effect == 'D >= 0.1') %>%
filter(type == 'Sensitivity') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 SUPPA2 0.772 D >= 0.1 Sensitivity
## 2 DRIMSeq(g) 0.683 D >= 0.1 Sensitivity
## 3 DRIMSeq(t) 0.682 D >= 0.1 Sensitivity
## 4 RATs(g) 0.666 D >= 0.1 Sensitivity
## 5 DEXSeq 0.663 D >= 0.1 Sensitivity
## 6 RATs(t) 0.653 D >= 0.1 Sensitivity
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
sensd[as.character(tmp2$tool[i])] = sensd[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.1') %>%
filter(type == 'FDR') %>%
arrange(value) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 RATs(t) 0.021 D >= 0.1 FDR
## 2 RATs(g) 0.036 D >= 0.1 FDR
## 3 DEXSeq 0.058 D >= 0.1 FDR
## 4 DRIMSeq(t) 0.066 D >= 0.1 FDR
## 5 DRIMSeq(g) 0.073 D >= 0.1 FDR
## 6 SUPPA2 0.136 D >= 0.1 FDR
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
fdrd[as.character(tmp2$tool[i])] = fdrd[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.1') %>%
filter(type == 'MCC') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 SUPPA2 0.798 D >= 0.1 MCC
## 2 RATs(g) 0.789 D >= 0.1 MCC
## 3 RATs(t) 0.788 D >= 0.1 MCC
## 4 DEXSeq 0.778 D >= 0.1 MCC
## 5 DRIMSeq(t) 0.753 D >= 0.1 MCC
## 6 DRIMSeq(g) 0.749 D >= 0.1 MCC
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
mccd[as.character(tmp2$tool[i])] = mccd[as.character(tmp2$tool[i])] + i
}
# D>=0.05
tmp2 <- tmp %>%
filter(effect == 'D >= 0.05') %>%
filter(type == 'Sensitivity') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 RATs(g) 0.801 D >= 0.05 Sensitivity
## 2 SUPPA2 0.795 D >= 0.05 Sensitivity
## 3 RATs(t) 0.754 D >= 0.05 Sensitivity
## 4 DRIMSeq(g) 0.685 D >= 0.05 Sensitivity
## 5 DRIMSeq(t) 0.684 D >= 0.05 Sensitivity
## 6 DEXSeq 0.670 D >= 0.05 Sensitivity
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
sensd[as.character(tmp2$tool[i])] = sensd[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.05') %>%
filter(type == 'FDR') %>%
arrange(value) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 DRIMSeq(t) 0.086 D >= 0.05 FDR
## 2 DEXSeq 0.088 D >= 0.05 FDR
## 3 DRIMSeq(g) 0.098 D >= 0.05 FDR
## 4 RATs(t) 0.116 D >= 0.05 FDR
## 5 SUPPA2 0.171 D >= 0.05 FDR
## 6 RATs(g) 0.218 D >= 0.05 FDR
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
fdrd[as.character(tmp2$tool[i])] = fdrd[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.05') %>%
filter(type == 'MCC') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 RATs(t) 0.804 D >= 0.05 MCC
## 2 SUPPA2 0.792 D >= 0.05 MCC
## 3 RATs(g) 0.775 D >= 0.05 MCC
## 4 DEXSeq 0.768 D >= 0.05 MCC
## 5 DRIMSeq(t) 0.742 D >= 0.05 MCC
## 6 DRIMSeq(g) 0.736 D >= 0.05 MCC
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
mccd[as.character(tmp2$tool[i])] = mccd[as.character(tmp2$tool[i])] + i
}
Prefilter > 50
tmp <- mega %>%
filter(tool != 'DEXsgr') %>%
filter(!reproducibility %in% c('Qrep >= 0.9 Rrep >= 0.75', 'Qrep >= 0.85 Rrep >= 0.65', 'Qrep >= 0.8 Rrep >= 0.55')) %>%
filter(quantification == 'Salmon') %>%
filter(species == 'Fruitfly') %>%
filter(prefilter == 50)
# D>=0.2
tmp2 <- tmp %>%
filter(effect == 'D >= 0.2') %>%
filter(type == 'Sensitivity') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 DRIMSeq(g) 0.647 D >= 0.2 Sensitivity
## 2 DRIMSeq(t) 0.631 D >= 0.2 Sensitivity
## 3 SUPPA2 0.628 D >= 0.2 Sensitivity
## 4 DEXSeq 0.556 D >= 0.2 Sensitivity
## 5 RATs(g) 0.535 D >= 0.2 Sensitivity
## 6 RATs(t) 0.526 D >= 0.2 Sensitivity
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
sensd[as.character(tmp2$tool[i])] = sensd[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.2') %>%
filter(type == 'FDR') %>%
arrange(value) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 RATs(g) 0.007 D >= 0.2 FDR
## 2 RATs(t) 0.008 D >= 0.2 FDR
## 3 SUPPA2 0.012 D >= 0.2 FDR
## 4 DRIMSeq(t) 0.024 D >= 0.2 FDR
## 5 DRIMSeq(g) 0.026 D >= 0.2 FDR
## 6 DEXSeq 0.030 D >= 0.2 FDR
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
fdrd[as.character(tmp2$tool[i])] = fdrd[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.2') %>%
filter(type == 'MCC') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 SUPPA2 0.773 D >= 0.2 MCC
## 2 DRIMSeq(g) 0.738 D >= 0.2 MCC
## 3 DRIMSeq(t) 0.728 D >= 0.2 MCC
## 4 DEXSeq 0.721 D >= 0.2 MCC
## 5 RATs(g) 0.716 D >= 0.2 MCC
## 6 RATs(t) 0.709 D >= 0.2 MCC
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
mccd[as.character(tmp2$tool[i])] = mccd[as.character(tmp2$tool[i])] + i
}
# D>=0.1
tmp2 <- tmp %>%
filter(effect == 'D >= 0.1') %>%
filter(type == 'Sensitivity') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 SUPPA2 0.781 D >= 0.1 Sensitivity
## 2 DRIMSeq(g) 0.703 D >= 0.1 Sensitivity
## 3 DRIMSeq(t) 0.700 D >= 0.1 Sensitivity
## 4 RATs(g) 0.666 D >= 0.1 Sensitivity
## 5 RATs(t) 0.653 D >= 0.1 Sensitivity
## 6 DEXSeq 0.612 D >= 0.1 Sensitivity
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
sensd[as.character(tmp2$tool[i])] = sensd[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.1') %>%
filter(type == 'FDR') %>%
arrange(value) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 RATs(t) 0.017 D >= 0.1 FDR
## 2 RATs(g) 0.023 D >= 0.1 FDR
## 3 DRIMSeq(t) 0.054 D >= 0.1 FDR
## 4 DEXSeq 0.058 D >= 0.1 FDR
## 5 DRIMSeq(g) 0.061 D >= 0.1 FDR
## 6 SUPPA2 0.070 D >= 0.1 FDR
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
fdrd[as.character(tmp2$tool[i])] = fdrd[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.1') %>%
filter(type == 'MCC') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 SUPPA2 0.839 D >= 0.1 MCC
## 2 RATs(g) 0.795 D >= 0.1 MCC
## 3 RATs(t) 0.790 D >= 0.1 MCC
## 4 DRIMSeq(t) 0.757 D >= 0.1 MCC
## 5 DRIMSeq(g) 0.756 D >= 0.1 MCC
## 6 DEXSeq 0.745 D >= 0.1 MCC
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
mccd[as.character(tmp2$tool[i])] = mccd[as.character(tmp2$tool[i])] + i
}
# D>=0.05
tmp2 <- tmp %>%
filter(effect == 'D >= 0.05') %>%
filter(type == 'Sensitivity') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 SUPPA2 0.817 D >= 0.05 Sensitivity
## 2 RATs(g) 0.801 D >= 0.05 Sensitivity
## 3 RATs(t) 0.754 D >= 0.05 Sensitivity
## 4 DRIMSeq(g) 0.704 D >= 0.05 Sensitivity
## 5 DRIMSeq(t) 0.702 D >= 0.05 Sensitivity
## 6 DEXSeq 0.616 D >= 0.05 Sensitivity
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
sensd[as.character(tmp2$tool[i])] = sensd[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.05') %>%
filter(type == 'FDR') %>%
arrange(value) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 DRIMSeq(t) 0.063 D >= 0.05 FDR
## 2 DRIMSeq(g) 0.076 D >= 0.05 FDR
## 3 DEXSeq 0.083 D >= 0.05 FDR
## 4 RATs(t) 0.118 D >= 0.05 FDR
## 5 SUPPA2 0.149 D >= 0.05 FDR
## 6 RATs(g) 0.222 D >= 0.05 FDR
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
fdrd[as.character(tmp2$tool[i])] = fdrd[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.05') %>%
filter(type == 'MCC') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 SUPPA2 0.818 D >= 0.05 MCC
## 2 RATs(t) 0.803 D >= 0.05 MCC
## 3 RATs(g) 0.773 D >= 0.05 MCC
## 4 DRIMSeq(t) 0.753 D >= 0.05 MCC
## 5 DRIMSeq(g) 0.746 D >= 0.05 MCC
## 6 DEXSeq 0.737 D >= 0.05 MCC
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
mccd[as.character(tmp2$tool[i])] = mccd[as.character(tmp2$tool[i])] + i
}
Cumulative rank
print( data.frame(Overall = tallyd,
Sensitivity = sensd,
FDR = fdrd,
MCC = mccd) )
## Overall Sensitivity FDR MCC
## RATs(t) 89 41 19 29
## RATs(g) 87 29 29 29
## DRIMSeq(t) 98 32 28 38
## DRIMSeq(g) 106 26 39 41
## SUPPA2 80 14 47 19
## DEXSeq 107 47 27 33
tmp <- data.frame(Cumulative_Rank= c(tallyh + tallyd, sensh + sensd, fdrh + fdrd, mcch + mccd),
Type=factor(rep(c('Overall', 'Sensitivity', 'FDR', 'MCC'), each=6), levels=c('Overall', 'Sensitivity', 'FDR', 'MCC')),
Tool=c(names(tallyh), names(sensh), names(fdrh), names(mcch)) )
p <-ggplot(tmp, aes(x=Tool, fill=Type, y=Cumulative_Rank)) + gth +
geom_bar(stat='identity', position='dodge') +
scale_y_reverse() +
scale_x_discrete(position = "top") +
scale_fill_manual(values=c(Overall='grey70', Sensitivity='orange', FDR='red', MCC='blue'))
print(p)
pdf('cumrank.pdf')
print(p)
dev.off()
## quartz_off_screen
## 2
The simulated sets were focused on coding genes only. Non-coding genes would be expected to appear as non-expressed and are added to the FP and TN.
Redefine the calculations of TP,FP,etc to include non-coding genes as expected Negatives.
# Redefine the metrics function
measureup <- function(x, tool, fx, qt=0.95, rt=0.85, pre=0) {
df <- countup(x, tool, fx, qt, rt)
dt <- data.table(value= c(round(df[, tp/(tp+fn)], digits=3),
round(df[, (fp+fna)/(tp+fp+fna)], digits=3),
round(df[, as.double(tp*(tn+tna) - (fp+fna)*fn) / sqrt( as.double(tp + fp+fna)*as.double(tp + fn)*as.double(tn+tna + fp+fna)*as.double(tn+tna + fn) )], digits=3)),
type= factor(c(rep('Sensitivity', 4), rep('FDR', 4), rep('MCC', 4)), levels=c('Sensitivity', 'FDR', 'MCC')),
tool= factor(rep(tool, 12), levels=c('DRIMSeq(g)', 'DRIMSeq(t)', 'RATs(g)', 'RATs(t)', 'SUPPA2', 'DEXSeq')),
species=rep(df$set, 3),
quantification=rep(df$quant, 3),
effect=rep(paste('D >= ', fx), 12) ,
reproducibility=factor(rep(paste('Qrep', ifelse(is.na(qt), '', '>='), qt, 'Rrep', ifelse(is.na(rt), '', '>='), rt), 12)),
prefilter=pre)
if (tool=='DEXSeq')
dt[, tool := rep(c('DEXSeq', 'DEXsgr'), 6)]
return(dt)
}
RATs
# RATs Dprop >= 0.20
rgd <- measureup(list(rdk,rds,rhk,rhs), 'RATs(g)', fx=0.20, qt=0.95, rt=0.85, pre=0)
rtd <- measureup(list(rdk,rds,rhk,rhs), 'RATs(t)', fx=0.20, qt=0.95, rt=0.85, pre=0)
# more permissive reproducibility
rgd1 <- measureup(list(rdk,rds,rhk,rhs), 'RATs(g)', fx=0.20, qt=0.90, rt=0.75, pre=0)
rtd1 <- measureup(list(rdk,rds,rhk,rhs), 'RATs(t)', fx=0.20, qt=0.90, rt=0.75, pre=0)
rgd2 <- measureup(list(rdk,rds,rhk,rhs), 'RATs(g)', fx=0.20, qt=0.85, rt=0.65, pre=0)
rtd2 <- measureup(list(rdk,rds,rhk,rhs), 'RATs(t)', fx=0.20, qt=0.85, rt=0.65, pre=0)
rgd3 <- measureup(list(rdk,rds,rhk,rhs), 'RATs(g)', fx=0.20, qt=0.80, rt=0.55, pre=0)
rtd3 <- measureup(list(rdk,rds,rhk,rhs), 'RATs(t)', fx=0.20, qt=0.80, rt=0.55, pre=0)
# RATs Dprop >= 0.10
rgf <- measureup(list(rdkth,rdsth,rhkth,rhsth), 'RATs(g)', fx=0.10, qt=0.95, rt=0.85, pre=0)
rtf <- measureup(list(rdkth,rdsth,rhkth,rhsth), 'RATs(t)', fx=0.10, qt=0.95, rt=0.85, pre=0)
# more permissive reproducibility
rgf1 <- measureup(list(rdkth,rdsth,rhkth,rhsth), 'RATs(g)', fx=0.10, qt=0.90, rt=0.75, pre=0)
rtf1 <- measureup(list(rdkth,rdsth,rhkth,rhsth), 'RATs(t)', fx=0.10, qt=0.90, rt=0.75, pre=0)
rgf2 <- measureup(list(rdkth,rdsth,rhkth,rhsth), 'RATs(g)', fx=0.10, qt=0.85, rt=0.65, pre=0)
rtf2 <- measureup(list(rdkth,rdsth,rhkth,rhsth), 'RATs(t)', fx=0.10, qt=0.85, rt=0.65, pre=0)
rgf3 <- measureup(list(rdkth,rdsth,rhkth,rhsth), 'RATs(g)', fx=0.10, qt=0.80, rt=0.55, pre=0)
rtf3 <- measureup(list(rdkth,rdsth,rhkth,rhsth), 'RATs(t)', fx=0.10, qt=0.80, rt=0.55, pre=0)
# RATs Dprop >= 0.05
rgff <- measureup(list(rdkth2,rdsth2,rhkth2,rhsth2), 'RATs(g)', fx=0.05, qt=0.95, rt=0.85, pre=0)
rtff <- measureup(list(rdkth2,rdsth2,rhkth2,rhsth2), 'RATs(t)', fx=0.05, qt=0.95, rt=0.85, pre=0)
# more permissive reproducibility
rgff1 <- measureup(list(rdkth2,rdsth2,rhkth2,rhsth2), 'RATs(g)', fx=0.05, qt=0.90, rt=0.75, pre=0)
rtff1 <- measureup(list(rdkth2,rdsth2,rhkth2,rhsth2), 'RATs(t)', fx=0.05, qt=0.90, rt=0.75, pre=0)
rgff2 <- measureup(list(rdkth2,rdsth2,rhkth2,rhsth2), 'RATs(g)', fx=0.05, qt=0.85, rt=0.65, pre=0)
rtff2 <- measureup(list(rdkth2,rdsth2,rhkth2,rhsth2), 'RATs(t)', fx=0.05, qt=0.85, rt=0.65, pre=0)
rgff3 <- measureup(list(rdkth2,rdsth2,rhkth2,rhsth2), 'RATs(g)', fx=0.05, qt=0.80, rt=0.55, pre=0)
rtff3 <- measureup(list(rdkth2,rdsth2,rhkth2,rhsth2), 'RATs(t)', fx=0.05, qt=0.80, rt=0.55, pre=0)
# RATs Dprop >= 0.20
rgd10 <- measureup(list(rdk10,rds10,rhk10,rhs10), 'RATs(g)', fx=0.20, qt=0.95, rt=0.85, pre=10)
rtd10 <- measureup(list(rdk10,rds10,rhk10,rhs10), 'RATs(t)', fx=0.20, qt=0.95, rt=0.85, pre=10)
# more permissive reproducibility
rgd110 <- measureup(list(rdk10,rds10,rhk10,rhs10), 'RATs(g)', fx=0.20, qt=0.90, rt=0.75, pre=10)
rtd110 <- measureup(list(rdk10,rds10,rhk10,rhs10), 'RATs(t)', fx=0.20, qt=0.90, rt=0.75, pre=10)
rgd210 <- measureup(list(rdk10,rds10,rhk10,rhs10), 'RATs(g)', fx=0.20, qt=0.85, rt=0.65, pre=10)
rtd210 <- measureup(list(rdk10,rds10,rhk10,rhs10), 'RATs(t)', fx=0.20, qt=0.85, rt=0.65, pre=10)
rgd310 <- measureup(list(rdk10,rds10,rhk10,rhs10), 'RATs(g)', fx=0.20, qt=0.80, rt=0.55, pre=10)
rtd310 <- measureup(list(rdk10,rds10,rhk10,rhs10), 'RATs(t)', fx=0.20, qt=0.80, rt=0.55, pre=10)
# RATs Dprop >= 0.10
rgf10 <- measureup(list(rdkth10,rdsth10,rhkth10,rhsth10), 'RATs(g)', fx=0.10, qt=0.95, rt=0.85, pre=10)
rtf10 <- measureup(list(rdkth10,rdsth10,rhkth10,rhsth10), 'RATs(t)', fx=0.10, qt=0.95, rt=0.85, pre=10)
# more permissive reproducibility
rgf110 <- measureup(list(rdkth10,rdsth10,rhkth10,rhsth10), 'RATs(g)', fx=0.10, qt=0.90, rt=0.75, pre=10)
rtf110 <- measureup(list(rdkth10,rdsth10,rhkth10,rhsth10), 'RATs(t)', fx=0.10, qt=0.90, rt=0.75, pre=10)
rgf210 <- measureup(list(rdkth10,rdsth10,rhkth10,rhsth10), 'RATs(g)', fx=0.10, qt=0.85, rt=0.65, pre=10)
rtf210 <- measureup(list(rdkth10,rdsth10,rhkth10,rhsth10), 'RATs(t)', fx=0.10, qt=0.85, rt=0.65, pre=10)
rgf310 <- measureup(list(rdkth10,rdsth10,rhkth10,rhsth10), 'RATs(g)', fx=0.10, qt=0.80, rt=0.55, pre=10)
rtf310 <- measureup(list(rdkth10,rdsth10,rhkth10,rhsth10), 'RATs(t)', fx=0.10, qt=0.80, rt=0.55, pre=10)
# RATs Dprop >= 0.05
rgff10 <- measureup(list(rdkth210,rdsth210,rhkth210,rhsth210), 'RATs(g)', fx=0.05, qt=0.95, rt=0.85, pre=10)
rtff10 <- measureup(list(rdkth210,rdsth210,rhkth210,rhsth210), 'RATs(t)', fx=0.05, qt=0.95, rt=0.85, pre=10)
# more permissive reproducibility
rgff110 <- measureup(list(rdkth210,rdsth210,rhkth210,rhsth210), 'RATs(g)', fx=0.05, qt=0.90, rt=0.75, pre=10)
rtff110 <- measureup(list(rdkth210,rdsth210,rhkth210,rhsth210), 'RATs(t)', fx=0.05, qt=0.90, rt=0.75, pre=10)
rgff210 <- measureup(list(rdkth210,rdsth210,rhkth210,rhsth210), 'RATs(g)', fx=0.05, qt=0.85, rt=0.65, pre=10)
rtff210 <- measureup(list(rdkth210,rdsth210,rhkth210,rhsth210), 'RATs(t)', fx=0.05, qt=0.85, rt=0.65, pre=10)
rgff310 <- measureup(list(rdkth210,rdsth210,rhkth210,rhsth210), 'RATs(g)', fx=0.05, qt=0.80, rt=0.55, pre=10)
rtff310 <- measureup(list(rdkth210,rdsth210,rhkth210,rhsth210), 'RATs(t)', fx=0.05, qt=0.80, rt=0.55, pre=10)
# RATs Dprop >= 0.20
rgd50 <- measureup(list(rdk50,rds50,rhk50,rhs50), 'RATs(g)', fx=0.20, qt=0.95, rt=0.85, pre=50)
rtd50 <- measureup(list(rdk50,rds50,rhk50,rhs50), 'RATs(t)', fx=0.20, qt=0.95, rt=0.85, pre=50)
# more permissive reproducibility
rgd150 <- measureup(list(rdk50,rds50,rhk50,rhs50), 'RATs(g)', fx=0.20, qt=0.90, rt=0.75, pre=50)
rtd150 <- measureup(list(rdk50,rds50,rhk50,rhs50), 'RATs(t)', fx=0.20, qt=0.90, rt=0.75, pre=50)
rgd250 <- measureup(list(rdk50,rds50,rhk50,rhs50), 'RATs(g)', fx=0.20, qt=0.85, rt=0.65, pre=50)
rtd250 <- measureup(list(rdk50,rds50,rhk50,rhs50), 'RATs(t)', fx=0.20, qt=0.85, rt=0.65, pre=50)
rgd350 <- measureup(list(rdk50,rds50,rhk50,rhs50), 'RATs(g)', fx=0.20, qt=0.80, rt=0.55, pre=50)
rtd350 <- measureup(list(rdk50,rds50,rhk50,rhs50), 'RATs(t)', fx=0.20, qt=0.80, rt=0.55, pre=50)
# RATs Dprop >= 0.10
rgf50 <- measureup(list(rdkth50,rdsth50,rhkth50,rhsth50), 'RATs(g)', fx=0.10, qt=0.95, rt=0.85, pre=50)
rtf50 <- measureup(list(rdkth50,rdsth50,rhkth50,rhsth50), 'RATs(t)', fx=0.10, qt=0.95, rt=0.85, pre=50)
# more permissive reproducibility
rgf150 <- measureup(list(rdkth50,rdsth50,rhkth50,rhsth50), 'RATs(g)', fx=0.10, qt=0.90, rt=0.75, pre=50)
rtf150 <- measureup(list(rdkth50,rdsth50,rhkth50,rhsth50), 'RATs(t)', fx=0.10, qt=0.90, rt=0.75, pre=50)
rgf250 <- measureup(list(rdkth50,rdsth50,rhkth50,rhsth50), 'RATs(g)', fx=0.10, qt=0.85, rt=0.65, pre=50)
rtf250 <- measureup(list(rdkth50,rdsth50,rhkth50,rhsth50), 'RATs(t)', fx=0.10, qt=0.85, rt=0.65, pre=50)
rgf350 <- measureup(list(rdkth50,rdsth50,rhkth50,rhsth50), 'RATs(g)', fx=0.10, qt=0.80, rt=0.55, pre=50)
rtf350 <- measureup(list(rdkth50,rdsth50,rhkth50,rhsth50), 'RATs(t)', fx=0.10, qt=0.80, rt=0.55, pre=50)
# RATs Dprop >= 0.05
rgff50 <- measureup(list(rdkth250,rdsth2,rhkth2,rhsth2), 'RATs(g)', fx=0.05, qt=0.95, rt=0.85, pre=50)
rtff50 <- measureup(list(rdkth250,rdsth2,rhkth2,rhsth2), 'RATs(t)', fx=0.05, qt=0.95, rt=0.85, pre=50)
# more permissive reproducibility
rgff150 <- measureup(list(rdkth250,rdsth250,rhkth250,rhsth250), 'RATs(g)', fx=0.05, qt=0.90, rt=0.75, pre=50)
rtff150 <- measureup(list(rdkth250,rdsth250,rhkth250,rhsth250), 'RATs(t)', fx=0.05, qt=0.90, rt=0.75, pre=50)
rgff250 <- measureup(list(rdkth250,rdsth250,rhkth250,rhsth250), 'RATs(g)', fx=0.05, qt=0.85, rt=0.65, pre=50)
rtff250 <- measureup(list(rdkth250,rdsth250,rhkth250,rhsth250), 'RATs(t)', fx=0.05, qt=0.85, rt=0.65, pre=50)
rgff350 <- measureup(list(rdkth250,rdsth250,rhkth250,rhsth250), 'RATs(g)', fx=0.05, qt=0.80, rt=0.55, pre=50)
rtff350 <- measureup(list(rdkth250,rdsth250,rhkth250,rhsth250), 'RATs(t)', fx=0.05, qt=0.80, rt=0.55, pre=50)
SUPPA2
# SUPPA2 does not bootstrap the reproducibility.
# SUPPA2 dPSI >= 0.20
sd <- measureup(list(sdk,sds,shk,shs), 'SUPPA2', fx=0.20, qt=NA_real_, rt=NA_real_, pre=0)
# SUPPA2 dPSI >= 0.10
sf <- measureup(list(sdk,sds,shk,shs), 'SUPPA2', fx=0.10, qt=NA_real_, rt=NA_real_, pre=0)
# SUPPA2 dPSI >= 0.05
sff <- measureup(list(sdk,sds,shk,shs), 'SUPPA2', fx=0.05, qt=NA_real_, rt=NA_real_, pre=0)
# SUPPA2 does not bootstrap the reproducibility.
# SUPPA2 dPSI >= 0.20
sd10 <- measureup(list(sdk10,sds10,shk10,shs10), 'SUPPA2', fx=0.20, qt=NA_real_, rt=NA_real_, pre=10)
# SUPPA2 dPSI >= 0.10
sf10 <- measureup(list(sdk10,sds10,shk10,shs10), 'SUPPA2', fx=0.10, qt=NA_real_, rt=NA_real_, pre=10)
# SUPPA2 dPSI >= 0.05
sff10 <- measureup(list(sdk10,sds10,shk10,shs10), 'SUPPA2', fx=0.05, qt=NA_real_, rt=NA_real_, pre=10)
# SUPPA2 does not bootstrap the reproducibility.
# SUPPA2 dPSI >= 0.20
sd50 <- measureup(list(sdk50,sds50,shk50,shs50), 'SUPPA2', fx=0.20, qt=NA_real_, rt=NA_real_, pre=50)
# SUPPA2 dPSI >= 0.10
sf50 <- measureup(list(sdk50,sds50,shk50,shs50), 'SUPPA2', fx=0.10, qt=NA_real_, rt=NA_real_, pre=50)
# SUPPA2 dPSI >= 0.05
sff50 <- measureup(list(sdk50,sds50,shk50,shs50), 'SUPPA2', fx=0.05, qt=NA_real_, rt=NA_real_, pre=50)
DRIMSeq
# DRIMSeq does not bootstrap the reproducibility.
# Gene-level
# Dprop >= 0.20
dd <- measureup(list(ddk,dds,dhk,dhs), 'DRIMSeq(g)', fx=0.20, qt=NA_real_, rt=NA_real_, pre=0)
# Dprop >= 0.10
df <- measureup(list(ddk,dds,dhk,dhs), 'DRIMSeq(g)', fx=0.10, qt=NA_real_, rt=NA_real_, pre=0)
# Dprop >= 0.05
dff <- measureup(list(ddk,dds,dhk,dhs), 'DRIMSeq(g)', fx=0.05, qt=NA_real_, rt=NA_real_, pre=0)
# Transcript-level
# Dprop >= 0.20
ddt <- measureup(list(ddkt,ddst,dhkt,dhst), 'DRIMSeq(t)', fx=0.20, qt=NA_real_, rt=NA_real_, pre=0)
# Dprop >= 0.10
dft <- measureup(list(ddkt,ddst,dhkt,dhst), 'DRIMSeq(t)', fx=0.10, qt=NA_real_, rt=NA_real_, pre=0)
# Dprop >= 0.05
dfft <- measureup(list(ddkt,ddst,dhkt,dhst), 'DRIMSeq(t)', fx=0.05, qt=NA_real_, rt=NA_real_, pre=0)
# DRIMSeq does not bootstrap the reproducibility.
# Gene-level
# Dprop >= 0.20
dd10 <- measureup(list(ddk10,dds10,dhk10,dhs10), 'DRIMSeq(g)', fx=0.20, qt=NA_real_, rt=NA_real_, pre=10)
# Dprop >= 0.10
df10 <- measureup(list(ddk10,dds10,dhk10,dhs10), 'DRIMSeq(g)', fx=0.10, qt=NA_real_, rt=NA_real_, pre=10)
# Dprop >= 0.05
dff10 <- measureup(list(ddk10,dds10,dhk10,dhs10), 'DRIMSeq(g)', fx=0.05, qt=NA_real_, rt=NA_real_, pre=10)
# Transcript-level
# Dprop >= 0.20
ddt10 <- measureup(list(ddkt10,ddst10,dhkt10,dhst10), 'DRIMSeq(t)', fx=0.20, qt=NA_real_, rt=NA_real_, pre=10)
# Dprop >= 0.10
dft10 <- measureup(list(ddkt10,ddst10,dhkt10,dhst10), 'DRIMSeq(t)', fx=0.10, qt=NA_real_, rt=NA_real_, pre=10)
# Dprop >= 0.05
dfft10 <- measureup(list(ddkt10,ddst10,dhkt10,dhst10), 'DRIMSeq(t)', fx=0.05, qt=NA_real_, rt=NA_real_, pre=10)
# DRIMSeq does not bootstrap the reproducibility.
# Gene-level
# Dprop >= 0.20
dd50 <- measureup(list(ddk50,dds50,dhk50,dhs50), 'DRIMSeq(g)', fx=0.20, qt=NA_real_, rt=NA_real_, pre=50)
# Dprop >= 0.10
df50 <- measureup(list(ddk50,dds50,dhk50,dhs50), 'DRIMSeq(g)', fx=0.10, qt=NA_real_, rt=NA_real_, pre=50)
# Dprop >= 0.05
dff50 <- measureup(list(ddk50,dds50,dhk50,dhs50), 'DRIMSeq(g)', fx=0.05, qt=NA_real_, rt=NA_real_, pre=50)
# Transcript-level
# Dprop >= 0.20
ddt50 <- measureup(list(ddkt50,ddst50,dhkt50,dhst50), 'DRIMSeq(t)', fx=0.20, qt=NA_real_, rt=NA_real_, pre=50)
# Dprop >= 0.10
dft50 <- measureup(list(ddkt50,ddst50,dhkt50,dhst50), 'DRIMSeq(t)', fx=0.10, qt=NA_real_, rt=NA_real_, pre=50)
# Dprop >= 0.05
dfft50 <- measureup(list(ddkt50,ddst50,dhkt50,dhst50), 'DRIMSeq(t)', fx=0.05, qt=NA_real_, rt=NA_real_, pre=50)
DEXSeq
# DEXSeq does not bootstrap the reproducibility.
# DEXSeq with and without stageR corrections. Dprop >= 0.20
xd <- measureup(list(xds, xsds, xhs, xshs), 'DEXSeq', fx=0.20, qt=NA_real_, rt=NA_real_, pre=0 )
# DEXSeq with and without stageR corrections. Dprop >= 0.10
xf <- measureup(list(xds, xsds, xhs, xshs), 'DEXSeq', fx=0.10, qt=NA_real_, rt=NA_real_, pre=0 )
# DEXSeq with and without stageR corrections. Dprop >= 0.05
xff <- measureup(list(xds, xsds, xhs, xshs), 'DEXSeq', fx=0.05, qt=NA_real_, rt=NA_real_, pre=0 )
# DEXSeq does not bootstrap the reproducibility.
# DEXSeq with and without stageR corrections. Dprop >= 0.20
xd10 <- measureup(list(xds10, xsds10, xhs10, xshs10), 'DEXSeq', fx=0.20, qt=NA_real_, rt=NA_real_, pre=10 )
# DEXSeq with and without stageR corrections. Dprop >= 0.10
xf10 <- measureup(list(xds10, xsds10, xhs10, xshs10), 'DEXSeq', fx=0.10, qt=NA_real_, rt=NA_real_, pre=10 )
# DEXSeq with and without stageR corrections. Dprop >= 0.05
xff10 <- measureup(list(xds10, xsds10, xhs10, xshs10), 'DEXSeq', fx=0.05, qt=NA_real_, rt=NA_real_, pre=10 )
# DEXSeq does not bootstrap the reproducibility.
# DEXSeq with and without stageR corrections. Dprop >= 0.20
xd50 <- measureup(list(xds50, xsds50, xhs50, xshs50), 'DEXSeq', fx=0.20, qt=NA_real_, rt=NA_real_, pre=50 )
# DEXSeq with and without stageR corrections. Dprop >= 0.10
xf50 <- measureup(list(xds50, xsds50, xhs50, xshs50), 'DEXSeq', fx=0.10, qt=NA_real_, rt=NA_real_, pre=50 )
# DEXSeq with and without stageR corrections. Dprop >= 0.05
xff50 <- measureup(list(xds50, xsds50, xhs50, xshs50), 'DEXSeq', fx=0.05, qt=NA_real_, rt=NA_real_, pre=50 )
# Easier to work with ggplot2 if everything is in one table.
mega <- rbindlist(list(rgd, rtd, rgd1, rtd1, rgd2, rtd2, rgd3, rtd3,
rgf, rtf, rgf1, rtf1, rgf2, rtf2, rgf3, rtf3,
rgff, rtff, rgff1, rtff1, rgff2, rtff2, rgff3, rtff3,
sd, sf, sff, dd, df, dff, ddt, dft, dfft, xd, xf, xff,
rgd10, rtd10, rgd110, rtd110, rgd210, rtd210, rgd310, rtd310,
rgf10, rtf10, rgf110, rtf110, rgf210, rtf210, rgf310, rtf310,
rgff10, rtff10, rgff110, rtff110, rgff210, rtff210, rgff310, rtff310,
sd10, sf10, sff10, dd10, df10, dff10, ddt10, dft10, dfft10, xd10, xf10, xff10,
rgd50, rtd50, rgd150, rtd150, rgd250, rtd250, rgd350, rtd350,
rgf50, rtf50, rgf150, rtf150, rgf250, rtf250, rgf350, rtf350,
rgff50, rtff50, rgff150, rtff150, rgff250, rtff250, rgff350, rtff350,
sd50, sf50, sff50, dd50, df50, dff50, ddt50, dft50, dfft50, xd50, xf50, xff50))
Plot.
# Human set quantified with Salmon.
hs <- ggplot(mega[quantification=='Salmon' & species=='Human', ]) + gth3 +
facet_grid(type ~ tool, switch='y', scale="free_y") +
transition_states(prefilter, transition_length = 0.5, state_length = 3) +
geom_point(aes(x=effect, y=value, colour=reproducibility, shape=tool), shape=20) +
scale_y_continuous(expand=c(0,0), breaks=c(0, 0.2, 0.4, 0.6, 0.8, 1), sec.axis=dup_axis(name='')) +
coord_cartesian(ylim=c(0, 1)) +
scale_colour_manual(values=c('Qrep NA Rrep NA' = 'blue',
'Qrep >= 0.95 Rrep >= 0.85' = 'darkred',
'Qrep >= 0.9 Rrep >= 0.75' = 'red',
'Qrep >= 0.85 Rrep >= 0.65' = 'orange',
'Qrep >= 0.8 Rrep >= 0.55' = 'gold')) +
labs(title='DTU performance at typical significance level of 0.05', subtitle='(simulated human dataset quantified with Salmon)', y="Abundance prefilter : {closest_state}") +
guides(colour=guide_legend(nrow=2)) +
theme(legend.position = "bottom")
anim_save("analysis_sim_v3_perf+HS.gif", hs)
hs2 <- ggplot(mega[quantification=='Salmon' & species=='Human' & tool != 'DEXsgr', ]) + gth3 +
facet_grid(type ~ tool, switch='y', scale="free_y") +
transition_states(prefilter, transition_length = 0.5, state_length = 3) +
geom_point(aes(x=effect, y=value, colour=reproducibility, shape=tool), shape=20) +
scale_y_continuous(expand=c(0,0), breaks=c(0, 0.2, 0.4, 0.6, 0.8, 1), sec.axis=dup_axis(name='')) +
coord_cartesian(ylim=c(0, 1)) +
scale_colour_manual(values=c('Qrep NA Rrep NA' = 'blue',
'Qrep >= 0.95 Rrep >= 0.85' = 'darkred',
'Qrep >= 0.9 Rrep >= 0.75' = 'red',
'Qrep >= 0.85 Rrep >= 0.65' = 'orange',
'Qrep >= 0.8 Rrep >= 0.55' = 'gold')) +
labs(title='DTU performance at typical significance level of 0.05', subtitle='(simulated human dataset quantified with Salmon)', y="Abundance prefilter : {closest_state}") +
guides(colour=guide_legend(nrow=2)) +
theme(legend.position = "bottom")
anim_save("analysis_sim_v3_perf+HS-.gif", hs2)
# Fly set quantified with Salmon.
ds <- ggplot(mega[quantification=='Salmon' & species=='Fruitfly', ]) + gth3 +
facet_grid(type ~ tool, switch='y', scale="free_y") +
transition_states(prefilter, transition_length = 0.5, state_length = 3) +
geom_point(aes(x=effect, y=value, colour=reproducibility, shape=tool), shape=20) +
scale_y_continuous(expand=c(0,0), breaks=c(0, 0.2, 0.4, 0.6, 0.8, 1), sec.axis=dup_axis(name='')) +
coord_cartesian(ylim=c(0, 1)) +
scale_colour_manual(values=c('Qrep NA Rrep NA' = 'blue',
'Qrep >= 0.95 Rrep >= 0.85' = 'darkred',
'Qrep >= 0.9 Rrep >= 0.75' = 'red',
'Qrep >= 0.85 Rrep >= 0.65' = 'orange',
'Qrep >= 0.8 Rrep >= 0.55' = 'gold')) +
labs(title='DTU performance at typical significance level of 0.05', subtitle='(simulated fruitfly dataset quantified with Salmon)', y="Abundance prefilter : {closest_state}") +
guides(colour=guide_legend(nrow=2)) +
theme(legend.position = "bottom")
anim_save("analysis_sim_v3_perf+DS.gif", ds)
ds2 <- ggplot(mega[quantification=='Salmon' & species=='Fruitfly' & tool != 'DEXsgr', ]) + gth3 +
facet_grid(type ~ tool, switch='y', scale="free_y") +
transition_states(prefilter, transition_length = 0.5, state_length = 3) +
geom_point(aes(x=effect, y=value, colour=reproducibility, shape=tool), shape=20) +
scale_y_continuous(expand=c(0,0), breaks=c(0, 0.2, 0.4, 0.6, 0.8, 1), sec.axis=dup_axis(name='')) +
coord_cartesian(ylim=c(0, 1)) +
scale_colour_manual(values=c('Qrep NA Rrep NA' = 'blue',
'Qrep >= 0.95 Rrep >= 0.85' = 'darkred',
'Qrep >= 0.9 Rrep >= 0.75' = 'red',
'Qrep >= 0.85 Rrep >= 0.65' = 'orange',
'Qrep >= 0.8 Rrep >= 0.55' = 'gold')) +
labs(title='DTU performance at typical significance level of 0.05', subtitle='(simulated fruitfly dataset quantified with Salmon)', y="Abundance prefilter : {closest_state}") +
guides(colour=guide_legend(nrow=2)) +
theme(legend.position = "bottom")
anim_save("analysis_sim_v3_perf+DS-.gif", ds2)
# Human set quantified with Kallisto.
hk <- ggplot(mega[quantification=='Kallisto' & species=='Human', ]) + gth3 +
facet_grid(type ~ tool, switch='y', scale="free_y") +
transition_states(prefilter, transition_length = 0.5, state_length = 3) +
geom_point(aes(x=effect, y=value, colour=reproducibility, shape=tool), shape=20) +
scale_y_continuous(expand=c(0,0), breaks=c(0, 0.2, 0.4, 0.6, 0.8, 1), sec.axis=dup_axis(name='')) +
coord_cartesian(ylim=c(0, 1)) +
scale_colour_manual(values=c('Qrep NA Rrep NA' = 'blue',
'Qrep >= 0.95 Rrep >= 0.85' = 'darkred',
'Qrep >= 0.9 Rrep >= 0.75' = 'red',
'Qrep >= 0.85 Rrep >= 0.65' = 'orange',
'Qrep >= 0.8 Rrep >= 0.55' = 'gold')) +
labs(title='DTU performance at typical significance level of 0.05', subtitle='(simulated human dataset quantified with Kallisto)', y="Abundance prefilter : {closest_state}") +
guides(colour=guide_legend(nrow=2)) +
theme(legend.position = "bottom")
anim_save("analysis_sim_v3_perf+HK.gif", hk)
# Fruitfly set quantified with Kallisto.
dk <- ggplot(mega[quantification=='Kallisto' & species=='Fruitfly', ]) + gth3 +
facet_grid(type ~ tool, switch='y', scale="free_y") +
transition_states(prefilter, transition_length = 0.5, state_length = 3) +
geom_point(aes(x=effect, y=value, colour=reproducibility, shape=tool), shape=20) +
scale_y_continuous(expand=c(0,0), breaks=c(0, 0.2, 0.4, 0.6, 0.8, 1), sec.axis=dup_axis(name='')) +
coord_cartesian(ylim=c(0, 1)) +
scale_colour_manual(values=c('Qrep NA Rrep NA' = 'blue',
'Qrep >= 0.95 Rrep >= 0.85' = 'darkred',
'Qrep >= 0.9 Rrep >= 0.75' = 'red',
'Qrep >= 0.85 Rrep >= 0.65' = 'orange',
'Qrep >= 0.8 Rrep >= 0.55' = 'gold')) +
labs(title='DTU performance at typical significance level of 0.05', subtitle='(simulated fruitfly dataset quantified with Kallisto)', y="Abundance prefilter : {closest_state}") +
guides(colour=guide_legend(nrow=2)) +
theme(legend.position = "bottom")
anim_save("analysis_sim_v3_perf+DK.gif", dk)
The inclusion of the excluded genes as additional negative cases does not alter the result significantly.
Without prefilter
tallyh = c('RATs(t)'=0, 'RATs(g)'=0, 'DRIMSeq(t)'=0, 'DRIMSeq(g)'=0, 'SUPPA2'=0, 'DEXSeq'=0)
sensh = c('RATs(t)'=0, 'RATs(g)'=0, 'DRIMSeq(t)'=0, 'DRIMSeq(g)'=0, 'SUPPA2'=0, 'DEXSeq'=0)
fdrh = c('RATs(t)'=0, 'RATs(g)'=0, 'DRIMSeq(t)'=0, 'DRIMSeq(g)'=0, 'SUPPA2'=0, 'DEXSeq'=0)
mcch = c('RATs(t)'=0, 'RATs(g)'=0, 'DRIMSeq(t)'=0, 'DRIMSeq(g)'=0, 'SUPPA2'=0, 'DEXSeq'=0)
tmp <- mega %>%
filter(tool != 'DEXsgr') %>%
filter(!reproducibility %in% c('Qrep >= 0.9 Rrep >= 0.75', 'Qrep >= 0.85 Rrep >= 0.65', 'Qrep >= 0.8 Rrep >= 0.55')) %>%
filter(quantification == 'Salmon') %>%
filter(species == 'Human') %>%
filter(prefilter == 0)
# D>=0.2
tmp2 <- tmp %>%
filter(effect == 'D >= 0.2') %>%
filter(type == 'Sensitivity') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 SUPPA2 0.612 D >= 0.2 Sensitivity
## 2 DEXSeq 0.570 D >= 0.2 Sensitivity
## 3 RATs(g) 0.563 D >= 0.2 Sensitivity
## 4 RATs(t) 0.549 D >= 0.2 Sensitivity
## 5 DRIMSeq(t) 0.513 D >= 0.2 Sensitivity
## 6 DRIMSeq(g) 0.505 D >= 0.2 Sensitivity
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
sensh[as.character(tmp2$tool[i])] = sensh[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.2') %>%
filter(type == 'FDR') %>%
arrange(value) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 RATs(t) 0.028 D >= 0.2 FDR
## 2 DRIMSeq(t) 0.033 D >= 0.2 FDR
## 3 DEXSeq 0.039 D >= 0.2 FDR
## 4 RATs(g) 0.049 D >= 0.2 FDR
## 5 DRIMSeq(g) 0.068 D >= 0.2 FDR
## 6 SUPPA2 0.358 D >= 0.2 FDR
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
fdrh[as.character(tmp2$tool[i])] = fdrh[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.2') %>%
filter(type == 'MCC') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 DEXSeq 0.737 D >= 0.2 MCC
## 2 RATs(g) 0.727 D >= 0.2 MCC
## 3 RATs(t) 0.727 D >= 0.2 MCC
## 4 DRIMSeq(t) 0.689 D >= 0.2 MCC
## 5 DRIMSeq(g) 0.678 D >= 0.2 MCC
## 6 SUPPA2 0.621 D >= 0.2 MCC
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
mcch[as.character(tmp2$tool[i])] = mcch[as.character(tmp2$tool[i])] + i
}
# D>=0.1
tmp2 <- tmp %>%
filter(effect == 'D >= 0.1') %>%
filter(type == 'Sensitivity') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 SUPPA2 0.788 D >= 0.1 Sensitivity
## 2 RATs(g) 0.716 D >= 0.1 Sensitivity
## 3 RATs(t) 0.692 D >= 0.1 Sensitivity
## 4 DEXSeq 0.599 D >= 0.1 Sensitivity
## 5 DRIMSeq(t) 0.558 D >= 0.1 Sensitivity
## 6 DRIMSeq(g) 0.555 D >= 0.1 Sensitivity
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
sensh[as.character(tmp2$tool[i])] = sensh[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.1') %>%
filter(type == 'FDR') %>%
arrange(value) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 DRIMSeq(t) 0.067 D >= 0.1 FDR
## 2 DEXSeq 0.076 D >= 0.1 FDR
## 3 RATs(t) 0.077 D >= 0.1 FDR
## 4 DRIMSeq(g) 0.120 D >= 0.1 FDR
## 5 RATs(g) 0.291 D >= 0.1 FDR
## 6 SUPPA2 0.525 D >= 0.1 FDR
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
fdrh[as.character(tmp2$tool[i])] = fdrh[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.1') %>%
filter(type == 'MCC') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 RATs(t) 0.796 D >= 0.1 MCC
## 2 DEXSeq 0.741 D >= 0.1 MCC
## 3 DRIMSeq(t) 0.706 D >= 0.1 MCC
## 4 RATs(g) 0.705 D >= 0.1 MCC
## 5 DRIMSeq(g) 0.690 D >= 0.1 MCC
## 6 SUPPA2 0.603 D >= 0.1 MCC
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
mcch[as.character(tmp2$tool[i])] = mcch[as.character(tmp2$tool[i])] + i
}
# D>=0.05
tmp2 <- tmp %>%
filter(effect == 'D >= 0.05') %>%
filter(type == 'Sensitivity') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 SUPPA2 0.871 D >= 0.05 Sensitivity
## 2 RATs(g) 0.864 D >= 0.05 Sensitivity
## 3 RATs(t) 0.777 D >= 0.05 Sensitivity
## 4 DEXSeq 0.602 D >= 0.05 Sensitivity
## 5 DRIMSeq(t) 0.574 D >= 0.05 Sensitivity
## 6 DRIMSeq(g) 0.566 D >= 0.05 Sensitivity
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
sensh[as.character(tmp2$tool[i])] = sensh[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.05') %>%
filter(type == 'FDR') %>%
arrange(value) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 DRIMSeq(t) 0.110 D >= 0.05 FDR
## 2 DEXSeq 0.124 D >= 0.05 FDR
## 3 DRIMSeq(g) 0.177 D >= 0.05 FDR
## 4 RATs(t) 0.314 D >= 0.05 FDR
## 5 SUPPA2 0.654 D >= 0.05 FDR
## 6 RATs(g) 0.673 D >= 0.05 FDR
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
fdrh[as.character(tmp2$tool[i])] = fdrh[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.05') %>%
filter(type == 'MCC') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 RATs(t) 0.726 D >= 0.05 MCC
## 2 DEXSeq 0.723 D >= 0.05 MCC
## 3 DRIMSeq(t) 0.696 D >= 0.05 MCC
## 4 DRIMSeq(g) 0.672 D >= 0.05 MCC
## 5 SUPPA2 0.538 D >= 0.05 MCC
## 6 RATs(g) 0.513 D >= 0.05 MCC
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
mcch[as.character(tmp2$tool[i])] = mcch[as.character(tmp2$tool[i])] + i
}
Prefilter > 10
tmp <- mega %>%
filter(tool != 'DEXsgr') %>%
filter(!reproducibility %in% c('Qrep >= 0.9 Rrep >= 0.75', 'Qrep >= 0.85 Rrep >= 0.65', 'Qrep >= 0.8 Rrep >= 0.55')) %>%
filter(quantification == 'Salmon') %>%
filter(species == 'Human') %>%
filter(prefilter == 10)
# D>=0.2
tmp2 <- tmp %>%
filter(effect == 'D >= 0.2') %>%
filter(type == 'Sensitivity') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 SUPPA2 0.608 D >= 0.2 Sensitivity
## 2 DRIMSeq(g) 0.596 D >= 0.2 Sensitivity
## 3 DRIMSeq(t) 0.589 D >= 0.2 Sensitivity
## 4 DEXSeq 0.567 D >= 0.2 Sensitivity
## 5 RATs(g) 0.563 D >= 0.2 Sensitivity
## 6 RATs(t) 0.549 D >= 0.2 Sensitivity
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
sensh[as.character(tmp2$tool[i])] = sensh[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.2') %>%
filter(type == 'FDR') %>%
arrange(value) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 RATs(t) 0.018 D >= 0.2 FDR
## 2 RATs(g) 0.023 D >= 0.2 FDR
## 3 DEXSeq 0.032 D >= 0.2 FDR
## 4 SUPPA2 0.050 D >= 0.2 FDR
## 5 DRIMSeq(t) 0.058 D >= 0.2 FDR
## 6 DRIMSeq(g) 0.078 D >= 0.2 FDR
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
fdrh[as.character(tmp2$tool[i])] = fdrh[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.2') %>%
filter(type == 'MCC') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 SUPPA2 0.757 D >= 0.2 MCC
## 2 DEXSeq 0.738 D >= 0.2 MCC
## 3 RATs(g) 0.737 D >= 0.2 MCC
## 4 RATs(t) 0.731 D >= 0.2 MCC
## 5 DRIMSeq(g) 0.731 D >= 0.2 MCC
## 6 DRIMSeq(t) 0.726 D >= 0.2 MCC
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
mcch[as.character(tmp2$tool[i])] = mcch[as.character(tmp2$tool[i])] + i
}
# D>=0.1
tmp2 <- tmp %>%
filter(effect == 'D >= 0.1') %>%
filter(type == 'Sensitivity') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 SUPPA2 0.787 D >= 0.1 Sensitivity
## 2 RATs(g) 0.716 D >= 0.1 Sensitivity
## 3 RATs(t) 0.692 D >= 0.1 Sensitivity
## 4 DRIMSeq(g) 0.651 D >= 0.1 Sensitivity
## 5 DRIMSeq(t) 0.641 D >= 0.1 Sensitivity
## 6 DEXSeq 0.599 D >= 0.1 Sensitivity
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
sensh[as.character(tmp2$tool[i])] = sensh[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.1') %>%
filter(type == 'FDR') %>%
arrange(value) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 DEXSeq 0.063 D >= 0.1 FDR
## 2 RATs(t) 0.065 D >= 0.1 FDR
## 3 DRIMSeq(t) 0.115 D >= 0.1 FDR
## 4 DRIMSeq(g) 0.183 D >= 0.1 FDR
## 5 RATs(g) 0.220 D >= 0.1 FDR
## 6 SUPPA2 0.260 D >= 0.1 FDR
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
fdrh[as.character(tmp2$tool[i])] = fdrh[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.1') %>%
filter(type == 'MCC') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 RATs(t) 0.802 D >= 0.1 MCC
## 2 SUPPA2 0.759 D >= 0.1 MCC
## 3 DEXSeq 0.746 D >= 0.1 MCC
## 4 RATs(g) 0.741 D >= 0.1 MCC
## 5 DRIMSeq(t) 0.732 D >= 0.1 MCC
## 6 DRIMSeq(g) 0.717 D >= 0.1 MCC
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
mcch[as.character(tmp2$tool[i])] = mcch[as.character(tmp2$tool[i])] + i
}
# D>=0.05
tmp2 <- tmp %>%
filter(effect == 'D >= 0.05') %>%
filter(type == 'Sensitivity') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 SUPPA2 0.878 D >= 0.05 Sensitivity
## 2 RATs(g) 0.864 D >= 0.05 Sensitivity
## 3 RATs(t) 0.778 D >= 0.05 Sensitivity
## 4 DRIMSeq(g) 0.656 D >= 0.05 Sensitivity
## 5 DRIMSeq(t) 0.649 D >= 0.05 Sensitivity
## 6 DEXSeq 0.601 D >= 0.05 Sensitivity
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
sensh[as.character(tmp2$tool[i])] = sensh[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.05') %>%
filter(type == 'FDR') %>%
arrange(value) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 DEXSeq 0.110 D >= 0.05 FDR
## 2 DRIMSeq(t) 0.176 D >= 0.05 FDR
## 3 DRIMSeq(g) 0.272 D >= 0.05 FDR
## 4 RATs(t) 0.310 D >= 0.05 FDR
## 5 SUPPA2 0.565 D >= 0.05 FDR
## 6 RATs(g) 0.658 D >= 0.05 FDR
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
fdrh[as.character(tmp2$tool[i])] = fdrh[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.05') %>%
filter(type == 'MCC') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 RATs(t) 0.728 D >= 0.05 MCC
## 2 DEXSeq 0.728 D >= 0.05 MCC
## 3 DRIMSeq(t) 0.707 D >= 0.05 MCC
## 4 DRIMSeq(g) 0.676 D >= 0.05 MCC
## 5 SUPPA2 0.610 D >= 0.05 MCC
## 6 RATs(g) 0.526 D >= 0.05 MCC
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
mcch[as.character(tmp2$tool[i])] = mcch[as.character(tmp2$tool[i])] + i
}
Prefilter > 50
tmp <- mega %>%
filter(tool != 'DEXsgr') %>%
filter(!reproducibility %in% c('Qrep >= 0.9 Rrep >= 0.75', 'Qrep >= 0.85 Rrep >= 0.65', 'Qrep >= 0.8 Rrep >= 0.55')) %>%
filter(quantification == 'Salmon') %>%
filter(species == 'Human') %>%
filter(prefilter == 50)
# D>=0.2
tmp2 <- tmp %>%
filter(effect == 'D >= 0.2') %>%
filter(type == 'Sensitivity') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 DRIMSeq(g) 0.632 D >= 0.2 Sensitivity
## 2 DRIMSeq(t) 0.616 D >= 0.2 Sensitivity
## 3 SUPPA2 0.600 D >= 0.2 Sensitivity
## 4 RATs(g) 0.562 D >= 0.2 Sensitivity
## 5 RATs(t) 0.549 D >= 0.2 Sensitivity
## 6 DEXSeq 0.523 D >= 0.2 Sensitivity
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
sensh[as.character(tmp2$tool[i])] = sensh[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.2') %>%
filter(type == 'FDR') %>%
arrange(value) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 RATs(t) 0.007 D >= 0.2 FDR
## 2 RATs(g) 0.009 D >= 0.2 FDR
## 3 DRIMSeq(t) 0.039 D >= 0.2 FDR
## 4 DRIMSeq(g) 0.051 D >= 0.2 FDR
## 5 DEXSeq 0.056 D >= 0.2 FDR
## 6 SUPPA2 0.059 D >= 0.2 FDR
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
fdrh[as.character(tmp2$tool[i])] = fdrh[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.2') %>%
filter(type == 'MCC') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 DRIMSeq(g) 0.763 D >= 0.2 MCC
## 2 SUPPA2 0.750 D >= 0.2 MCC
## 3 DRIMSeq(t) 0.747 D >= 0.2 MCC
## 4 RATs(g) 0.742 D >= 0.2 MCC
## 5 RATs(t) 0.735 D >= 0.2 MCC
## 6 DEXSeq 0.699 D >= 0.2 MCC
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
mcch[as.character(tmp2$tool[i])] = mcch[as.character(tmp2$tool[i])] + i
}
# D>=0.1
tmp2 <- tmp %>%
filter(effect == 'D >= 0.1') %>%
filter(type == 'Sensitivity') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 SUPPA2 0.776 D >= 0.1 Sensitivity
## 2 RATs(g) 0.715 D >= 0.1 Sensitivity
## 3 RATs(t) 0.692 D >= 0.1 Sensitivity
## 4 DRIMSeq(g) 0.678 D >= 0.1 Sensitivity
## 5 DRIMSeq(t) 0.665 D >= 0.1 Sensitivity
## 6 DEXSeq 0.552 D >= 0.1 Sensitivity
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
sensh[as.character(tmp2$tool[i])] = sensh[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.1') %>%
filter(type == 'FDR') %>%
arrange(value) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 RATs(t) 0.040 D >= 0.1 FDR
## 2 RATs(g) 0.081 D >= 0.1 FDR
## 3 DRIMSeq(t) 0.087 D >= 0.1 FDR
## 4 DEXSeq 0.094 D >= 0.1 FDR
## 5 DRIMSeq(g) 0.131 D >= 0.1 FDR
## 6 SUPPA2 0.152 D >= 0.1 FDR
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
fdrh[as.character(tmp2$tool[i])] = fdrh[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.1') %>%
filter(type == 'MCC') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 RATs(t) 0.813 D >= 0.1 MCC
## 2 SUPPA2 0.810 D >= 0.1 MCC
## 3 RATs(g) 0.807 D >= 0.1 MCC
## 4 DRIMSeq(t) 0.755 D >= 0.1 MCC
## 5 DRIMSeq(g) 0.754 D >= 0.1 MCC
## 6 DEXSeq 0.704 D >= 0.1 MCC
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
mcch[as.character(tmp2$tool[i])] = mcch[as.character(tmp2$tool[i])] + i
}
# D>=0.05
tmp2 <- tmp %>%
filter(effect == 'D >= 0.05') %>%
filter(type == 'Sensitivity') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 SUPPA2 0.902 D >= 0.05 Sensitivity
## 2 RATs(g) 0.864 D >= 0.05 Sensitivity
## 3 RATs(t) 0.777 D >= 0.05 Sensitivity
## 4 DRIMSeq(g) 0.683 D >= 0.05 Sensitivity
## 5 DRIMSeq(t) 0.671 D >= 0.05 Sensitivity
## 6 DEXSeq 0.555 D >= 0.05 Sensitivity
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
sensh[as.character(tmp2$tool[i])] = sensh[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.05') %>%
filter(type == 'FDR') %>%
arrange(value) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 DRIMSeq(t) 0.128 D >= 0.05 FDR
## 2 DEXSeq 0.150 D >= 0.05 FDR
## 3 DRIMSeq(g) 0.183 D >= 0.05 FDR
## 4 RATs(t) 0.314 D >= 0.05 FDR
## 5 SUPPA2 0.566 D >= 0.05 FDR
## 6 RATs(g) 0.673 D >= 0.05 FDR
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
fdrh[as.character(tmp2$tool[i])] = fdrh[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.05') %>%
filter(type == 'MCC') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 DRIMSeq(t) 0.738 D >= 0.05 MCC
## 2 DRIMSeq(g) 0.732 D >= 0.05 MCC
## 3 RATs(t) 0.726 D >= 0.05 MCC
## 4 DEXSeq 0.683 D >= 0.05 MCC
## 5 SUPPA2 0.623 D >= 0.05 MCC
## 6 RATs(g) 0.513 D >= 0.05 MCC
for (i in 1:nrow(tmp2)) {
tallyh[as.character(tmp2$tool[i])] = tallyh[as.character(tmp2$tool[i])] + i
mcch[as.character(tmp2$tool[i])] = mcch[as.character(tmp2$tool[i])] + i
}
Cumulative rank
print( data.frame(Overall = tallyh,
Sensitivity = sensh,
FDR = fdrh,
MCC = mcch) )
## Overall Sensitivity FDR MCC
## RATs(t) 74 33 21 20
## RATs(g) 100 24 38 38
## DRIMSeq(t) 93 40 21 32
## DRIMSeq(g) 111 37 37 37
## SUPPA2 94 11 49 34
## DEXSeq 95 44 23 28
Without prefilter
tallyd = c('RATs(t)'=0, 'RATs(g)'=0, 'DRIMSeq(t)'=0, 'DRIMSeq(g)'=0, 'SUPPA2'=0, 'DEXSeq'=0)
sensd = c('RATs(t)'=0, 'RATs(g)'=0, 'DRIMSeq(t)'=0, 'DRIMSeq(g)'=0, 'SUPPA2'=0, 'DEXSeq'=0)
fdrd = c('RATs(t)'=0, 'RATs(g)'=0, 'DRIMSeq(t)'=0, 'DRIMSeq(g)'=0, 'SUPPA2'=0, 'DEXSeq'=0)
mccd = c('RATs(t)'=0, 'RATs(g)'=0, 'DRIMSeq(t)'=0, 'DRIMSeq(g)'=0, 'SUPPA2'=0, 'DEXSeq'=0)
tmp <- mega %>%
filter(tool != 'DEXsgr') %>%
filter(!reproducibility %in% c('Qrep >= 0.9 Rrep >= 0.75', 'Qrep >= 0.85 Rrep >= 0.65', 'Qrep >= 0.8 Rrep >= 0.55')) %>%
filter(quantification == 'Salmon') %>%
filter(species == 'Fruitfly') %>%
filter(prefilter == 0)
# D>=0.2
tmp2 <- tmp %>%
filter(effect == 'D >= 0.2') %>%
filter(type == 'Sensitivity') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 SUPPA2 0.615 D >= 0.2 Sensitivity
## 2 DRIMSeq(g) 0.601 D >= 0.2 Sensitivity
## 3 DRIMSeq(t) 0.595 D >= 0.2 Sensitivity
## 4 DEXSeq 0.577 D >= 0.2 Sensitivity
## 5 RATs(g) 0.535 D >= 0.2 Sensitivity
## 6 RATs(t) 0.526 D >= 0.2 Sensitivity
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
sensd[as.character(tmp2$tool[i])] = sensd[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.2') %>%
filter(type == 'FDR') %>%
arrange(value) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 RATs(t) 0.009 D >= 0.2 FDR
## 2 RATs(g) 0.015 D >= 0.2 FDR
## 3 DEXSeq 0.015 D >= 0.2 FDR
## 4 DRIMSeq(t) 0.029 D >= 0.2 FDR
## 5 DRIMSeq(g) 0.036 D >= 0.2 FDR
## 6 SUPPA2 0.128 D >= 0.2 FDR
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
fdrd[as.character(tmp2$tool[i])] = fdrd[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.2') %>%
filter(type == 'MCC') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 DEXSeq 0.743 D >= 0.2 MCC
## 2 DRIMSeq(g) 0.741 D >= 0.2 MCC
## 3 DRIMSeq(t) 0.721 D >= 0.2 MCC
## 4 RATs(g) 0.719 D >= 0.2 MCC
## 5 SUPPA2 0.718 D >= 0.2 MCC
## 6 RATs(t) 0.710 D >= 0.2 MCC
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
mccd[as.character(tmp2$tool[i])] = mccd[as.character(tmp2$tool[i])] + i
}
# D>=0.1
tmp2 <- tmp %>%
filter(effect == 'D >= 0.1') %>%
filter(type == 'Sensitivity') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 SUPPA2 0.765 D >= 0.1 Sensitivity
## 2 RATs(g) 0.666 D >= 0.1 Sensitivity
## 3 RATs(t) 0.653 D >= 0.1 Sensitivity
## 4 DRIMSeq(t) 0.649 D >= 0.1 Sensitivity
## 5 DRIMSeq(g) 0.640 D >= 0.1 Sensitivity
## 6 DEXSeq 0.616 D >= 0.1 Sensitivity
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
sensd[as.character(tmp2$tool[i])] = sensd[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.1') %>%
filter(type == 'FDR') %>%
arrange(value) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 RATs(t) 0.024 D >= 0.1 FDR
## 2 DEXSeq 0.033 D >= 0.1 FDR
## 3 RATs(g) 0.043 D >= 0.1 FDR
## 4 DRIMSeq(t) 0.056 D >= 0.1 FDR
## 5 DRIMSeq(g) 0.071 D >= 0.1 FDR
## 6 SUPPA2 0.223 D >= 0.1 FDR
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
fdrd[as.character(tmp2$tool[i])] = fdrd[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.1') %>%
filter(type == 'MCC') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 RATs(g) 0.792 D >= 0.1 MCC
## 2 RATs(t) 0.788 D >= 0.1 MCC
## 3 DEXSeq 0.761 D >= 0.1 MCC
## 4 SUPPA2 0.755 D >= 0.1 MCC
## 5 DRIMSeq(g) 0.750 D >= 0.1 MCC
## 6 DRIMSeq(t) 0.743 D >= 0.1 MCC
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
mccd[as.character(tmp2$tool[i])] = mccd[as.character(tmp2$tool[i])] + i
}
# D>=0.05
tmp2 <- tmp %>%
filter(effect == 'D >= 0.05') %>%
filter(type == 'Sensitivity') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 RATs(g) 0.801 D >= 0.05 Sensitivity
## 2 SUPPA2 0.786 D >= 0.05 Sensitivity
## 3 RATs(t) 0.754 D >= 0.05 Sensitivity
## 4 DRIMSeq(t) 0.652 D >= 0.05 Sensitivity
## 5 DRIMSeq(g) 0.642 D >= 0.05 Sensitivity
## 6 DEXSeq 0.621 D >= 0.05 Sensitivity
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
sensd[as.character(tmp2$tool[i])] = sensd[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.05') %>%
filter(type == 'FDR') %>%
arrange(value) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 DEXSeq 0.052 D >= 0.05 FDR
## 2 DRIMSeq(t) 0.072 D >= 0.05 FDR
## 3 DRIMSeq(g) 0.099 D >= 0.05 FDR
## 4 RATs(t) 0.118 D >= 0.05 FDR
## 5 RATs(g) 0.222 D >= 0.05 FDR
## 6 SUPPA2 0.248 D >= 0.05 FDR
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
fdrd[as.character(tmp2$tool[i])] = fdrd[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.05') %>%
filter(type == 'MCC') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 RATs(t) 0.804 D >= 0.05 MCC
## 2 RATs(g) 0.781 D >= 0.05 MCC
## 3 DEXSeq 0.756 D >= 0.05 MCC
## 4 SUPPA2 0.752 D >= 0.05 MCC
## 5 DRIMSeq(g) 0.738 D >= 0.05 MCC
## 6 DRIMSeq(t) 0.736 D >= 0.05 MCC
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
mccd[as.character(tmp2$tool[i])] = mccd[as.character(tmp2$tool[i])] + i
}
Prefilter > 10
tmp <- mega %>%
filter(tool != 'DEXsgr') %>%
filter(!reproducibility %in% c('Qrep >= 0.9 Rrep >= 0.75', 'Qrep >= 0.85 Rrep >= 0.65', 'Qrep >= 0.8 Rrep >= 0.55')) %>%
filter(quantification == 'Salmon') %>%
filter(species == 'Fruitfly') %>%
filter(prefilter == 10)
# D>=0.2
tmp2 <- tmp %>%
filter(effect == 'D >= 0.2') %>%
filter(type == 'Sensitivity') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 DRIMSeq(g) 0.635 D >= 0.2 Sensitivity
## 2 SUPPA2 0.618 D >= 0.2 Sensitivity
## 3 DRIMSeq(t) 0.614 D >= 0.2 Sensitivity
## 4 DEXSeq 0.588 D >= 0.2 Sensitivity
## 5 RATs(g) 0.535 D >= 0.2 Sensitivity
## 6 RATs(t) 0.526 D >= 0.2 Sensitivity
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
sensd[as.character(tmp2$tool[i])] = sensd[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.2') %>%
filter(type == 'FDR') %>%
arrange(value) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 RATs(t) 0.008 D >= 0.2 FDR
## 2 RATs(g) 0.009 D >= 0.2 FDR
## 3 DEXSeq 0.018 D >= 0.2 FDR
## 4 SUPPA2 0.026 D >= 0.2 FDR
## 5 DRIMSeq(t) 0.029 D >= 0.2 FDR
## 6 DRIMSeq(g) 0.033 D >= 0.2 FDR
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
fdrd[as.character(tmp2$tool[i])] = fdrd[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.2') %>%
filter(type == 'MCC') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 DRIMSeq(g) 0.762 D >= 0.2 MCC
## 2 SUPPA2 0.761 D >= 0.2 MCC
## 3 DEXSeq 0.749 D >= 0.2 MCC
## 4 DRIMSeq(t) 0.727 D >= 0.2 MCC
## 5 RATs(g) 0.722 D >= 0.2 MCC
## 6 RATs(t) 0.711 D >= 0.2 MCC
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
mccd[as.character(tmp2$tool[i])] = mccd[as.character(tmp2$tool[i])] + i
}
# D>=0.1
tmp2 <- tmp %>%
filter(effect == 'D >= 0.1') %>%
filter(type == 'Sensitivity') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 SUPPA2 0.772 D >= 0.1 Sensitivity
## 2 DRIMSeq(g) 0.683 D >= 0.1 Sensitivity
## 3 DRIMSeq(t) 0.682 D >= 0.1 Sensitivity
## 4 RATs(g) 0.666 D >= 0.1 Sensitivity
## 5 DEXSeq 0.663 D >= 0.1 Sensitivity
## 6 RATs(t) 0.653 D >= 0.1 Sensitivity
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
sensd[as.character(tmp2$tool[i])] = sensd[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.1') %>%
filter(type == 'FDR') %>%
arrange(value) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 RATs(t) 0.021 D >= 0.1 FDR
## 2 RATs(g) 0.036 D >= 0.1 FDR
## 3 DEXSeq 0.058 D >= 0.1 FDR
## 4 DRIMSeq(t) 0.066 D >= 0.1 FDR
## 5 DRIMSeq(g) 0.073 D >= 0.1 FDR
## 6 SUPPA2 0.136 D >= 0.1 FDR
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
fdrd[as.character(tmp2$tool[i])] = fdrd[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.1') %>%
filter(type == 'MCC') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 SUPPA2 0.801 D >= 0.1 MCC
## 2 RATs(g) 0.795 D >= 0.1 MCC
## 3 RATs(t) 0.789 D >= 0.1 MCC
## 4 DEXSeq 0.779 D >= 0.1 MCC
## 5 DRIMSeq(g) 0.773 D >= 0.1 MCC
## 6 DRIMSeq(t) 0.753 D >= 0.1 MCC
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
mccd[as.character(tmp2$tool[i])] = mccd[as.character(tmp2$tool[i])] + i
}
# D>=0.05
tmp2 <- tmp %>%
filter(effect == 'D >= 0.05') %>%
filter(type == 'Sensitivity') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 RATs(g) 0.801 D >= 0.05 Sensitivity
## 2 SUPPA2 0.795 D >= 0.05 Sensitivity
## 3 RATs(t) 0.754 D >= 0.05 Sensitivity
## 4 DRIMSeq(g) 0.685 D >= 0.05 Sensitivity
## 5 DRIMSeq(t) 0.684 D >= 0.05 Sensitivity
## 6 DEXSeq 0.670 D >= 0.05 Sensitivity
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
sensd[as.character(tmp2$tool[i])] = sensd[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.05') %>%
filter(type == 'FDR') %>%
arrange(value) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 DRIMSeq(t) 0.086 D >= 0.05 FDR
## 2 DEXSeq 0.088 D >= 0.05 FDR
## 3 DRIMSeq(g) 0.098 D >= 0.05 FDR
## 4 RATs(t) 0.116 D >= 0.05 FDR
## 5 SUPPA2 0.171 D >= 0.05 FDR
## 6 RATs(g) 0.218 D >= 0.05 FDR
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
fdrd[as.character(tmp2$tool[i])] = fdrd[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.05') %>%
filter(type == 'MCC') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 RATs(t) 0.805 D >= 0.05 MCC
## 2 SUPPA2 0.794 D >= 0.05 MCC
## 3 RATs(g) 0.783 D >= 0.05 MCC
## 4 DEXSeq 0.769 D >= 0.05 MCC
## 5 DRIMSeq(g) 0.761 D >= 0.05 MCC
## 6 DRIMSeq(t) 0.742 D >= 0.05 MCC
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
mccd[as.character(tmp2$tool[i])] = mccd[as.character(tmp2$tool[i])] + i
}
Prefilter > 50
tmp <- mega %>%
filter(tool != 'DEXsgr') %>%
filter(!reproducibility %in% c('Qrep >= 0.9 Rrep >= 0.75', 'Qrep >= 0.85 Rrep >= 0.65', 'Qrep >= 0.8 Rrep >= 0.55')) %>%
filter(quantification == 'Salmon') %>%
filter(species == 'Fruitfly') %>%
filter(prefilter == 50)
# D>=0.2
tmp2 <- tmp %>%
filter(effect == 'D >= 0.2') %>%
filter(type == 'Sensitivity') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 DRIMSeq(g) 0.647 D >= 0.2 Sensitivity
## 2 DRIMSeq(t) 0.631 D >= 0.2 Sensitivity
## 3 SUPPA2 0.628 D >= 0.2 Sensitivity
## 4 DEXSeq 0.556 D >= 0.2 Sensitivity
## 5 RATs(g) 0.535 D >= 0.2 Sensitivity
## 6 RATs(t) 0.526 D >= 0.2 Sensitivity
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
sensd[as.character(tmp2$tool[i])] = sensd[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.2') %>%
filter(type == 'FDR') %>%
arrange(value) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 RATs(g) 0.007 D >= 0.2 FDR
## 2 RATs(t) 0.008 D >= 0.2 FDR
## 3 SUPPA2 0.012 D >= 0.2 FDR
## 4 DRIMSeq(t) 0.024 D >= 0.2 FDR
## 5 DRIMSeq(g) 0.026 D >= 0.2 FDR
## 6 DEXSeq 0.030 D >= 0.2 FDR
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
fdrd[as.character(tmp2$tool[i])] = fdrd[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.2') %>%
filter(type == 'MCC') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 SUPPA2 0.777 D >= 0.2 MCC
## 2 DRIMSeq(g) 0.766 D >= 0.2 MCC
## 3 DRIMSeq(t) 0.728 D >= 0.2 MCC
## 4 DEXSeq 0.723 D >= 0.2 MCC
## 5 RATs(g) 0.722 D >= 0.2 MCC
## 6 RATs(t) 0.711 D >= 0.2 MCC
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
mccd[as.character(tmp2$tool[i])] = mccd[as.character(tmp2$tool[i])] + i
}
# D>=0.1
tmp2 <- tmp %>%
filter(effect == 'D >= 0.1') %>%
filter(type == 'Sensitivity') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 SUPPA2 0.781 D >= 0.1 Sensitivity
## 2 DRIMSeq(g) 0.703 D >= 0.1 Sensitivity
## 3 DRIMSeq(t) 0.700 D >= 0.1 Sensitivity
## 4 RATs(g) 0.666 D >= 0.1 Sensitivity
## 5 RATs(t) 0.653 D >= 0.1 Sensitivity
## 6 DEXSeq 0.612 D >= 0.1 Sensitivity
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
sensd[as.character(tmp2$tool[i])] = sensd[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.1') %>%
filter(type == 'FDR') %>%
arrange(value) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 RATs(t) 0.017 D >= 0.1 FDR
## 2 RATs(g) 0.023 D >= 0.1 FDR
## 3 DRIMSeq(t) 0.054 D >= 0.1 FDR
## 4 DEXSeq 0.058 D >= 0.1 FDR
## 5 DRIMSeq(g) 0.061 D >= 0.1 FDR
## 6 SUPPA2 0.070 D >= 0.1 FDR
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
fdrd[as.character(tmp2$tool[i])] = fdrd[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.1') %>%
filter(type == 'MCC') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 SUPPA2 0.843 D >= 0.1 MCC
## 2 RATs(g) 0.801 D >= 0.1 MCC
## 3 RATs(t) 0.791 D >= 0.1 MCC
## 4 DRIMSeq(g) 0.784 D >= 0.1 MCC
## 5 DRIMSeq(t) 0.757 D >= 0.1 MCC
## 6 DEXSeq 0.747 D >= 0.1 MCC
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
mccd[as.character(tmp2$tool[i])] = mccd[as.character(tmp2$tool[i])] + i
}
# D>=0.05
tmp2 <- tmp %>%
filter(effect == 'D >= 0.05') %>%
filter(type == 'Sensitivity') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 SUPPA2 0.817 D >= 0.05 Sensitivity
## 2 RATs(g) 0.801 D >= 0.05 Sensitivity
## 3 RATs(t) 0.754 D >= 0.05 Sensitivity
## 4 DRIMSeq(g) 0.704 D >= 0.05 Sensitivity
## 5 DRIMSeq(t) 0.702 D >= 0.05 Sensitivity
## 6 DEXSeq 0.616 D >= 0.05 Sensitivity
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
sensd[as.character(tmp2$tool[i])] = sensd[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.05') %>%
filter(type == 'FDR') %>%
arrange(value) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 DRIMSeq(t) 0.063 D >= 0.05 FDR
## 2 DRIMSeq(g) 0.076 D >= 0.05 FDR
## 3 DEXSeq 0.083 D >= 0.05 FDR
## 4 RATs(t) 0.118 D >= 0.05 FDR
## 5 SUPPA2 0.149 D >= 0.05 FDR
## 6 RATs(g) 0.222 D >= 0.05 FDR
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
fdrd[as.character(tmp2$tool[i])] = fdrd[as.character(tmp2$tool[i])] + i
}
tmp2 <- tmp %>%
filter(effect == 'D >= 0.05') %>%
filter(type == 'MCC') %>%
arrange(desc(value)) %>%
dplyr::select(tool, value, effect, type)
print(tmp2)
## tool value effect type
## 1 SUPPA2 0.822 D >= 0.05 MCC
## 2 RATs(t) 0.804 D >= 0.05 MCC
## 3 RATs(g) 0.781 D >= 0.05 MCC
## 4 DRIMSeq(g) 0.777 D >= 0.05 MCC
## 5 DRIMSeq(t) 0.753 D >= 0.05 MCC
## 6 DEXSeq 0.739 D >= 0.05 MCC
for (i in 1:nrow(tmp2)) {
tallyd[as.character(tmp2$tool[i])] = tallyd[as.character(tmp2$tool[i])] + i
mccd[as.character(tmp2$tool[i])] = mccd[as.character(tmp2$tool[i])] + i
}
Cumulative rank
Cumulative rank
print( data.frame(Overall = tallyd,
Sensitivity = sensd,
FDR = fdrd,
MCC = mccd) )
## Overall Sensitivity FDR MCC
## RATs(t) 90 41 19 30
## RATs(g) 85 29 29 27
## DRIMSeq(t) 104 32 28 44
## DRIMSeq(g) 98 26 39 33
## SUPPA2 82 14 47 21
## DEXSeq 108 47 27 34
tmp <- data.frame(Cumulative_Rank= c(tallyh + tallyd, sensh + sensd, fdrh + fdrd, mcch + mccd),
Type=factor(rep(c('Overall', 'Sensitivity', 'FDR', 'MCC'), each=6), levels=c('Overall', 'Sensitivity', 'FDR', 'MCC')),
Tool=c(names(tallyh), names(sensh), names(fdrh), names(mcch)) )
p <-ggplot(tmp, aes(x=Tool, fill=Type, y=Cumulative_Rank)) + gth +
geom_bar(stat='identity', position='dodge') +
scale_y_reverse() +
scale_x_discrete(position = "top") +
scale_fill_manual(values=c(Overall='grey70', Sensitivity='orange', FDR='red', MCC='blue'))
print(p)